From 8832a626d8a97466be6a0efd170aaf6c9e373a2d Mon Sep 17 00:00:00 2001
From: Naren Krishna <naren.krishna@snowflake.com>
Date: Wed, 24 Apr 2024 15:36:35 -0700
Subject: [PATCH] SNOW-1300434: Merge Snowpark pandas back to Snowpark Python
 (#1389)

* SNOW-1300434: Merge Snowpark pandas back to Snowpark Python

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* forgot to add precommit changes

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* Get back to only Snowpark Python tests

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix tox env and add path to pandas changelog

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* remove unnecessary changes

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* remove checkprs

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* add back file to pre-commit-config

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix rebase issue

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* Add Snowpark pandas tests

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* add iris.csv to tests/resources

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* revert daily precommit changes

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix test_utils_suite to add iris.csv

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix Snowpark pandas test failures

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix lint

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* Update to pandas 2.2.1 and fix some more tests

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix more tests

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* update conftest

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix upload name and remove pandas 2.2.1 from requirements

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix doctest and read_json_empty error

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* address some comments

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* remove exclued files from precommit

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* xfail test_read_json_empty_file

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* restore dataframe reader

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* update tox.ini

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* fix test

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* snowpark_package_to_sproc_packages change

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* Snowpandas changes including commit 1008

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* update query count

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* Pull in Modin dependency PR

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

* add Varnika's binary op PR

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>

---------

Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>
---
 .github/CODEOWNERS                            |     3 +
 .../workflows/changedoc_snowpark_pandas.yml   |    22 +
 .../workflows/changelog_snowpark_pandas.yml   |    22 +
 .github/workflows/precommit.yml               |   111 +-
 LICENSE.txt                                   |     2 +-
 ...ion_snowpark_pandas_telemetry_decorator.py |    77 +
 setup.py                                      |    29 +
 .../snowpark/_internal/open_telemetry.py      |     2 +-
 src/snowflake/snowpark/dataframe.py           |    90 +
 src/snowflake/snowpark/modin/NOTICE           |   249 +
 src/snowflake/snowpark/modin/__init__.py      |     3 +
 .../snowpark/modin/config/__init__.py         |   119 +
 .../snowpark/modin/config/__main__.py         |   108 +
 .../snowpark/modin/config/envvars.py          |   913 ++
 src/snowflake/snowpark/modin/config/pubsub.py |   447 +
 src/snowflake/snowpark/modin/conftest.py      |    19 +
 src/snowflake/snowpark/modin/core/__init__.py |     3 +
 .../snowpark/modin/core/dataframe/__init__.py |     3 +
 .../modin/core/dataframe/algebra/__init__.py  |     3 +
 .../algebra/default2pandas/__init__.py        |    66 +
 .../algebra/default2pandas/binary.py          |    78 +
 .../dataframe/algebra/default2pandas/cat.py   |    48 +
 .../algebra/default2pandas/dataframe.py       |    36 +
 .../algebra/default2pandas/datetime.py        |    48 +
 .../algebra/default2pandas/default.py         |   279 +
 .../algebra/default2pandas/groupby.py         |   728 +
 .../algebra/default2pandas/resample.py        |   106 +
 .../algebra/default2pandas/rolling.py         |   159 +
 .../algebra/default2pandas/series.py          |    49 +
 .../dataframe/algebra/default2pandas/str.py   |    48 +
 .../snowpark/modin/core/execution/__init__.py |     3 +
 .../core/execution/dispatching/__init__.py    |    22 +
 .../dispatching/factories/__init__.py         |    26 +
 .../execution/dispatching/factories/baseio.py |   680 +
 .../dispatching/factories/dispatcher.py       |   287 +
 .../dispatching/factories/factories.py        |   466 +
 .../dispatching/factories/pandasframe.py      |  3491 +++++
 .../snowpark/modin/pandas/__init__.py         |   348 +
 .../snowpark/modin/pandas/accessor.py         |   201 +
 .../snowpark/modin/pandas/api/__init__.py     |    24 +
 .../modin/pandas/api/extensions/__init__.py   |    32 +
 .../modin/pandas/api/extensions/extensions.py |   186 +
 src/snowflake/snowpark/modin/pandas/base.py   |  4184 ++++++
 .../snowpark/modin/pandas/dataframe.py        |  3445 +++++
 .../snowpark/modin/pandas/general.py          |  2387 +++
 .../snowpark/modin/pandas/groupby.py          |  1209 ++
 .../snowpark/modin/pandas/indexing.py         |  1337 ++
 src/snowflake/snowpark/modin/pandas/io.py     |  1163 ++
 .../snowpark/modin/pandas/iterator.py         |    82 +
 .../snowpark/modin/pandas/plotting.py         |    84 +
 .../snowpark/modin/pandas/resample.py         |   510 +
 src/snowflake/snowpark/modin/pandas/series.py |  2723 ++++
 .../snowpark/modin/pandas/series_utils.py     |  1517 ++
 .../snowpark/modin/pandas/shared_docs.py      |    62 +
 .../modin/pandas/snow_partition_iterator.py   |   111 +
 src/snowflake/snowpark/modin/pandas/utils.py  |   740 +
 src/snowflake/snowpark/modin/pandas/window.py |   463 +
 .../snowpark/modin/plugin/PANDAS_CHANGELOG.md |   312 +
 src/snowflake/snowpark/modin/plugin/README.md |   108 +
 .../snowpark/modin/plugin/__init__.py         |    30 +
 .../modin/plugin/_internal/__init__.py        |     3 +
 .../plugin/_internal/aggregation_utils.py     |  1073 ++
 .../modin/plugin/_internal/apply_utils.py     |  1258 ++
 .../modin/plugin/_internal/binary_op_utils.py |   540 +
 .../modin/plugin/_internal/concat_utils.py    |   347 +
 .../plugin/_internal/cumulative_utils.py      |   201 +
 .../modin/plugin/_internal/cut_utils.py       |   314 +
 .../snowpark/modin/plugin/_internal/frame.py  |  1196 ++
 .../modin/plugin/_internal/generator_utils.py |   229 +
 .../modin/plugin/_internal/groupby_utils.py   |   515 +
 .../modin/plugin/_internal/indexing_utils.py  |  3201 ++++
 .../modin/plugin/_internal/io_utils.py        |   161 +
 .../modin/plugin/_internal/isin_utils.py      |   306 +
 .../modin/plugin/_internal/join_utils.py      |  1268 ++
 .../plugin/_internal/ordered_dataframe.py     |  1894 +++
 .../modin/plugin/_internal/pivot_utils.py     |  1413 ++
 .../modin/plugin/_internal/resample_utils.py  |   747 +
 .../modin/plugin/_internal/session.py         |    97 +
 .../_internal/statement_params_constants.py   |    22 +
 .../modin/plugin/_internal/telemetry.py       |   494 +
 .../modin/plugin/_internal/timestamp_utils.py |   384 +
 .../modin/plugin/_internal/transpose_utils.py |   296 +
 .../modin/plugin/_internal/type_utils.py      |   429 +
 .../modin/plugin/_internal/unpivot_utils.py   |   897 ++
 .../snowpark/modin/plugin/_internal/utils.py  |  1745 +++
 .../modin/plugin/_internal/where_utils.py     |    32 +
 .../modin/plugin/_internal/window_utils.py    |   101 +
 .../snowpark/modin/plugin/_typing.py          |    89 +
 .../modin/plugin/compiler/__init__.py         |    26 +
 .../modin/plugin/compiler/doc_utils.py        |   696 +
 .../modin/plugin/compiler/query_compiler.py   |  4014 +++++
 .../compiler/snowflake_query_compiler.py      | 12396 ++++++++++++++++
 .../modin/plugin/default2pandas/__init__.py   |     3 +
 .../default2pandas/stored_procedure_utils.py  |   468 +
 .../modin/plugin/dev-requirements.txt         |     2 +
 .../modin/plugin/docstrings/__init__.py       |    25 +
 .../snowpark/modin/plugin/docstrings/base.py  |  3248 ++++
 .../modin/plugin/docstrings/dataframe.py      |  3971 +++++
 .../modin/plugin/docstrings/groupby.py        |  1843 +++
 .../modin/plugin/docstrings/resample.py       |   957 ++
 .../modin/plugin/docstrings/series.py         |  3077 ++++
 .../modin/plugin/docstrings/window.py         |   454 +
 .../modin/plugin/extensions/__init__.py       |     3 +
 .../plugin/extensions/dataframe_extensions.py |   240 +
 .../plugin/extensions/dataframe_overrides.py  |   130 +
 .../modin/plugin/extensions/pd_extensions.py  |   602 +
 .../modin/plugin/extensions/pd_overrides.py   |   655 +
 .../plugin/extensions/series_extensions.py    |   205 +
 .../plugin/extensions/series_overrides.py     |   178 +
 .../snowpark/modin/plugin/io/__init__.py      |     3 +
 .../snowpark/modin/plugin/io/snow_io.py       |   638 +
 .../snowpark/modin/plugin/utils/__init__.py   |     3 +
 .../modin/plugin/utils/error_message.py       |    76 +
 .../snowpark/modin/plugin/utils/exceptions.py |    23 +
 .../modin/plugin/utils/numpy_to_pandas.py     |   151 +
 .../modin/plugin/utils/warning_message.py     |   105 +
 src/snowflake/snowpark/modin/utils.py         |  1212 ++
 tests/conftest.py                             |    21 +
 tests/integ/modin/__init__.py                 |     3 +
 tests/integ/modin/binary/__init__.py          |     3 +
 .../binary/test_binary_default2pandas.py      |    81 +
 tests/integ/modin/binary/test_binary_op.py    |  2519 ++++
 tests/integ/modin/conftest.py                 |   696 +
 tests/integ/modin/data.py                     |    24 +
 tests/integ/modin/extensions/__init__.py      |     3 +
 .../extensions/test_dataframe_extensions.py   |    94 +
 .../extensions/test_series_extensions.py      |    91 +
 tests/integ/modin/frame/__init__.py           |     3 +
 tests/integ/modin/frame/conftest.py           |   183 +
 tests/integ/modin/frame/test_add_prefix.py    |    86 +
 tests/integ/modin/frame/test_add_suffix.py    |    86 +
 tests/integ/modin/frame/test_aggregate.py     |   742 +
 tests/integ/modin/frame/test_all_any.py       |   361 +
 tests/integ/modin/frame/test_apply.py         |  1034 ++
 tests/integ/modin/frame/test_applymap.py      |   185 +
 tests/integ/modin/frame/test_astype.py        |    94 +
 tests/integ/modin/frame/test_axis.py          |   938 ++
 tests/integ/modin/frame/test_copy.py          |    88 +
 tests/integ/modin/frame/test_cumulative.py    |    54 +
 tests/integ/modin/frame/test_describe.py      |   340 +
 tests/integ/modin/frame/test_diff.py          |   309 +
 tests/integ/modin/frame/test_drop.py          |   367 +
 .../integ/modin/frame/test_drop_duplicates.py |    77 +
 tests/integ/modin/frame/test_dropna.py        |   196 +
 tests/integ/modin/frame/test_dtypes.py        |   535 +
 tests/integ/modin/frame/test_duplicated.py    |    91 +
 tests/integ/modin/frame/test_empty.py         |    41 +
 tests/integ/modin/frame/test_ffill.py         |    49 +
 tests/integ/modin/frame/test_fillna.py        |   655 +
 tests/integ/modin/frame/test_filter.py        |   154 +
 .../frame/test_first_last_valid_index.py      |   129 +
 tests/integ/modin/frame/test_getattr.py       |    92 +
 tests/integ/modin/frame/test_getitem.py       |   390 +
 tests/integ/modin/frame/test_head_tail.py     |    73 +
 tests/integ/modin/frame/test_idxmax_idxmin.py |   267 +
 tests/integ/modin/frame/test_iloc.py          |  3152 ++++
 tests/integ/modin/frame/test_info.py          |   115 +
 tests/integ/modin/frame/test_insert.py        |   761 +
 tests/integ/modin/frame/test_isin.py          |   250 +
 tests/integ/modin/frame/test_isna.py          |   111 +
 tests/integ/modin/frame/test_iterrows.py      |   103 +
 tests/integ/modin/frame/test_itertuples.py    |   170 +
 tests/integ/modin/frame/test_join.py          |   280 +
 tests/integ/modin/frame/test_len.py           |    26 +
 tests/integ/modin/frame/test_loc.py           |  3800 +++++
 tests/integ/modin/frame/test_mask.py          |   957 ++
 tests/integ/modin/frame/test_melt.py          |   305 +
 tests/integ/modin/frame/test_memory_usage.py  |    19 +
 tests/integ/modin/frame/test_merge.py         |  1260 ++
 tests/integ/modin/frame/test_name.py          |    46 +
 tests/integ/modin/frame/test_ndim.py          |    33 +
 tests/integ/modin/frame/test_nunique.py       |   125 +
 tests/integ/modin/frame/test_quantile.py      |   113 +
 tests/integ/modin/frame/test_rank.py          |   143 +
 tests/integ/modin/frame/test_rename.py        |   521 +
 tests/integ/modin/frame/test_replace.py       |   210 +
 tests/integ/modin/frame/test_repr.py          |   233 +
 tests/integ/modin/frame/test_reset_index.py   |   598 +
 tests/integ/modin/frame/test_round.py         |   172 +
 tests/integ/modin/frame/test_sample.py        |    58 +
 tests/integ/modin/frame/test_select_dtypes.py |   164 +
 tests/integ/modin/frame/test_set_index.py     |   566 +
 tests/integ/modin/frame/test_setitem.py       |  1541 ++
 tests/integ/modin/frame/test_shape.py         |    48 +
 tests/integ/modin/frame/test_shift.py         |    98 +
 tests/integ/modin/frame/test_size.py          |    51 +
 tests/integ/modin/frame/test_skew.py          |   120 +
 tests/integ/modin/frame/test_sort_index.py    |    64 +
 tests/integ/modin/frame/test_sort_values.py   |   463 +
 tests/integ/modin/frame/test_squeeze.py       |    69 +
 tests/integ/modin/frame/test_take.py          |    60 +
 tests/integ/modin/frame/test_to_snowflake.py  |   225 +
 tests/integ/modin/frame/test_to_snowpark.py   |   204 +
 tests/integ/modin/frame/test_transpose.py     |   350 +
 tests/integ/modin/frame/test_unary_op.py      |   147 +
 tests/integ/modin/frame/test_value_counts.py  |   195 +
 tests/integ/modin/frame/test_where.py         |   993 ++
 tests/integ/modin/groupby/__init__.py         |     3 +
 tests/integ/modin/groupby/conftest.py         |   233 +
 .../integ/modin/groupby/test_groupby_apply.py |  1031 ++
 .../modin/groupby/test_groupby_basic_agg.py   |   914 ++
 .../test_groupby_dataframe_cumulative.py      |   278 +
 .../groupby/test_groupby_dataframe_rank.py    |   356 +
 .../groupby/test_groupby_dataframe_shift.py   |   145 +
 .../groupby/test_groupby_default2pandas.py    |   349 +
 .../modin/groupby/test_groupby_head_tail.py   |   159 +
 .../groupby/test_groupby_idxmax_idxmin.py     |   160 +
 .../modin/groupby/test_groupby_negative.py    |   510 +
 .../modin/groupby/test_groupby_ngroups.py     |   142 +
 .../modin/groupby/test_groupby_nunique.py     |    82 +
 .../modin/groupby/test_groupby_property.py    |   347 +
 .../modin/groupby/test_groupby_series.py      |   114 +
 .../groupby/test_groupby_series_cumulative.py |    52 +
 .../modin/groupby/test_groupby_series_rank.py |   183 +
 .../groupby/test_groupby_series_shift.py      |    58 +
 .../modin/groupby/test_groupby_transform.py   |   100 +
 tests/integ/modin/groupby/test_grouping.py    |   212 +
 tests/integ/modin/groupby/test_min_max.py     |   177 +
 tests/integ/modin/groupby/test_quantile.py    |   362 +
 tests/integ/modin/io/test_read_csv.py         |   777 +
 tests/integ/modin/io/test_read_json.py        |   373 +
 tests/integ/modin/io/test_read_parquet.py     |   195 +
 tests/integ/modin/io/test_read_snowflake.py   |   492 +
 .../io/test_read_snowflake_query_call.py      |    50 +
 .../modin/io/test_read_snowflake_query_cte.py |   195 +
 .../io/test_read_snowflake_query_order_by.py  |   246 +
 .../io/test_read_snowflake_select_query.py    |   393 +
 tests/integ/modin/io/test_to_dict.py          |    58 +
 tests/integ/modin/io/test_to_pandas.py        |    27 +
 tests/integ/modin/pandas_api_coverage.py      |   194 +
 tests/integ/modin/pivot/__init__.py           |     3 +
 tests/integ/modin/pivot/conftest.py           |   312 +
 tests/integ/modin/pivot/pivot_utils.py        |   109 +
 tests/integ/modin/pivot/test_pivot_dropna.py  |   180 +
 .../modin/pivot/test_pivot_fill_value.py      |   177 +
 tests/integ/modin/pivot/test_pivot_margins.py |   186 +
 .../integ/modin/pivot/test_pivot_multiple.py  |   279 +
 .../integ/modin/pivot/test_pivot_negative.py  |   192 +
 tests/integ/modin/pivot/test_pivot_single.py  |   209 +
 tests/integ/modin/pivot/test_pivot_utils.py   |    66 +
 tests/integ/modin/resample/__init__.py        |     3 +
 tests/integ/modin/resample/test_resample.py   |   461 +
 .../modin/resample/test_resample_negative.py  |   106 +
 tests/integ/modin/series/__init__.py          |     3 +
 tests/integ/modin/series/conftest.py          |   120 +
 tests/integ/modin/series/test_add_prefix.py   |    88 +
 tests/integ/modin/series/test_add_suffix.py   |    88 +
 tests/integ/modin/series/test_aggregate.py    |   321 +
 tests/integ/modin/series/test_all_any.py      |   183 +
 tests/integ/modin/series/test_apply.py        |   667 +
 tests/integ/modin/series/test_astype.py       |   450 +
 tests/integ/modin/series/test_axis.py         |   487 +
 .../modin/series/test_bitwise_operators.py    |   330 +
 .../integ/modin/series/test_convert_dtype.py  |    16 +
 tests/integ/modin/series/test_copy.py         |    79 +
 tests/integ/modin/series/test_cumulative.py   |    34 +
 tests/integ/modin/series/test_describe.py     |   160 +
 tests/integ/modin/series/test_diff.py         |   159 +
 .../modin/series/test_drop_duplicates.py      |    62 +
 tests/integ/modin/series/test_dropna.py       |   111 +
 tests/integ/modin/series/test_dt_accessor.py  |   116 +
 .../series/test_dt_accessor_unsupported.py    |   102 +
 tests/integ/modin/series/test_duplicated.py   |    44 +
 tests/integ/modin/series/test_empty.py        |    56 +
 tests/integ/modin/series/test_ffill.py        |    33 +
 tests/integ/modin/series/test_fillna.py       |   172 +
 .../series/test_first_last_valid_index.py     |   111 +
 tests/integ/modin/series/test_getattr.py      |    62 +
 tests/integ/modin/series/test_getitem.py      |   203 +
 tests/integ/modin/series/test_head_tail.py    |    52 +
 .../integ/modin/series/test_idxmax_idxmin.py  |    61 +
 tests/integ/modin/series/test_iloc.py         |   937 ++
 tests/integ/modin/series/test_isin.py         |   178 +
 tests/integ/modin/series/test_isna.py         |   117 +
 tests/integ/modin/series/test_len.py          |    26 +
 tests/integ/modin/series/test_loc.py          |  1759 +++
 tests/integ/modin/series/test_mask.py         |   308 +
 tests/integ/modin/series/test_name.py         |    74 +
 tests/integ/modin/series/test_ndim.py         |    33 +
 tests/integ/modin/series/test_nunique.py      |    73 +
 tests/integ/modin/series/test_quantile.py     |   162 +
 tests/integ/modin/series/test_rank.py         |   107 +
 tests/integ/modin/series/test_rename.py       |   219 +
 tests/integ/modin/series/test_replace.py      |   203 +
 tests/integ/modin/series/test_repr.py         |    50 +
 tests/integ/modin/series/test_round.py        |   131 +
 tests/integ/modin/series/test_sample.py       |    87 +
 tests/integ/modin/series/test_setitem.py      |  2445 +++
 tests/integ/modin/series/test_shape.py        |    44 +
 tests/integ/modin/series/test_shift.py        |    50 +
 tests/integ/modin/series/test_size.py         |    46 +
 tests/integ/modin/series/test_sort_index.py   |    51 +
 tests/integ/modin/series/test_sort_values.py  |   192 +
 tests/integ/modin/series/test_squeeze.py      |    51 +
 tests/integ/modin/series/test_str_accessor.py |   445 +
 tests/integ/modin/series/test_take.py         |    35 +
 tests/integ/modin/series/test_to_snowflake.py |   143 +
 tests/integ/modin/series/test_to_snowpark.py  |    88 +
 tests/integ/modin/series/test_transpose.py    |    81 +
 tests/integ/modin/series/test_unary_op.py     |   136 +
 tests/integ/modin/series/test_unique.py       |    76 +
 tests/integ/modin/series/test_value_counts.py |    81 +
 tests/integ/modin/series/test_where.py        |   309 +
 tests/integ/modin/sql_counter.py              |   637 +
 tests/integ/modin/strings/__init__.py         |     3 +
 .../integ/modin/strings/test_case_justify.py  |    34 +
 tests/integ/modin/strings/test_cat.py         |    99 +
 tests/integ/modin/strings/test_extract.py     |   160 +
 tests/integ/modin/strings/test_get_dummies.py |    78 +
 .../strings/test_get_dummies_dataframe.py     |   224 +
 .../modin/strings/test_get_dummies_series.py  |    54 +
 tests/integ/modin/strings/test_strings.py     |   602 +
 tests/integ/modin/test_classes.py             |    90 +
 tests/integ/modin/test_concat.py              |  1043 ++
 tests/integ/modin/test_cut.py                 |   173 +
 tests/integ/modin/test_default2pandas.py      |   410 +
 .../integ/modin/test_df_to_snowpark_pandas.py |   125 +
 tests/integ/modin/test_dtype_mapping.py       |   490 +
 .../integ/modin/test_from_pandas_to_pandas.py |   623 +
 tests/integ/modin/test_internal_frame.py      |    51 +
 tests/integ/modin/test_merge.py               |   108 +
 tests/integ/modin/test_numpy.py               |   239 +
 tests/integ/modin/test_ordered_dataframe.py   |  1082 ++
 tests/integ/modin/test_qcut.py                |   262 +
 tests/integ/modin/test_session.py             |   262 +
 tests/integ/modin/test_sql_counter.py         |   276 +
 tests/integ/modin/test_telemetry.py           |   590 +
 tests/integ/modin/test_to_numpy.py            |   152 +
 tests/integ/modin/test_unique.py              |   275 +
 tests/integ/modin/test_utils.py               |    37 +
 tests/integ/modin/tools/__init__.py           |     3 +
 tests/integ/modin/tools/test_date_range.py    |   236 +
 tests/integ/modin/tools/test_to_datetime.py   |   940 ++
 tests/integ/modin/tools/test_to_numeric.py    |   390 +
 tests/integ/modin/utils.py                    |   753 +
 tests/integ/modin/window/test_rolling.py      |   241 +
 tests/resources/iris.csv                      |   151 +
 tests/unit/modin/__init__.py                  |     3 +
 tests/unit/modin/conftest.py                  |    52 +
 tests/unit/modin/default2pandas/__init__.py   |     3 +
 .../test_stored_procedure_utils.py            |   297 +
 tests/unit/modin/extensions/__init__.py       |    16 +
 .../modin/extensions/test_pd_extensions.py    |    43 +
 tests/unit/modin/modin/__init__.py            |    16 +
 .../unit/modin/modin/docs_module/__init__.py  |    21 +
 tests/unit/modin/modin/docs_module/classes.py |    42 +
 .../unit/modin/modin/docs_module/functions.py |    21 +
 .../docs_module_with_just_base/__init__.py    |    20 +
 .../docs_module_with_just_base/classes.py     |    21 +
 tests/unit/modin/modin/test_envvars.py        |   371 +
 tests/unit/modin/modin/test_parameter.py      |   107 +
 tests/unit/modin/test_aggregation_utils.py    |   105 +
 tests/unit/modin/test_apply_utils.py          |    96 +
 tests/unit/modin/test_binary_op_utils.py      |    21 +
 tests/unit/modin/test_class.py                |    75 +
 tests/unit/modin/test_docstrings.py           |    11 +
 tests/unit/modin/test_frontend_utils.py       |   104 +
 tests/unit/modin/test_groupby_unsupported.py  |   109 +
 tests/unit/modin/test_groupby_utils.py        |    96 +
 tests/unit/modin/test_internal_frame.py       |   701 +
 tests/unit/modin/test_io.py                   |    28 +
 tests/unit/modin/test_ordered_dataframe.py    |   139 +
 tests/unit/modin/test_pandas_module.py        |    42 +
 tests/unit/modin/test_python_version.py       |    26 +
 tests/unit/modin/test_series_cat.py           |    53 +
 tests/unit/modin/test_series_dt.py            |   179 +
 tests/unit/modin/test_series_strings.py       |   182 +
 .../modin/test_snowflake_query_compiler.py    |    64 +
 tests/unit/modin/test_telemetry.py            |   228 +
 tests/unit/modin/test_test_utils.py           |   186 +
 tests/unit/modin/test_unsupported.py          |   271 +
 tests/unit/modin/test_utils.py                |   949 ++
 tests/unit/scala/test_utils_suite.py          |     1 +
 tests/utils.py                                |     4 +
 tox.ini                                       |    15 +
 375 files changed, 154797 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/changedoc_snowpark_pandas.yml
 create mode 100644 .github/workflows/changelog_snowpark_pandas.yml
 create mode 100644 ci/check_standalone_function_snowpark_pandas_telemetry_decorator.py
 create mode 100644 src/snowflake/snowpark/modin/NOTICE
 create mode 100644 src/snowflake/snowpark/modin/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/config/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/config/__main__.py
 create mode 100644 src/snowflake/snowpark/modin/config/envvars.py
 create mode 100644 src/snowflake/snowpark/modin/config/pubsub.py
 create mode 100644 src/snowflake/snowpark/modin/conftest.py
 create mode 100644 src/snowflake/snowpark/modin/core/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/binary.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/cat.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/dataframe.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/datetime.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/default.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/groupby.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/resample.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/rolling.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/series.py
 create mode 100644 src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/str.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/factories/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/factories/baseio.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py
 create mode 100644 src/snowflake/snowpark/modin/core/execution/dispatching/factories/pandasframe.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/accessor.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/api/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/api/extensions/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/api/extensions/extensions.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/base.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/dataframe.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/general.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/groupby.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/indexing.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/io.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/iterator.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/plotting.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/resample.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/series.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/series_utils.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/shared_docs.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/snow_partition_iterator.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/utils.py
 create mode 100644 src/snowflake/snowpark/modin/pandas/window.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
 create mode 100644 src/snowflake/snowpark/modin/plugin/README.md
 create mode 100644 src/snowflake/snowpark/modin/plugin/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/aggregation_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/cumulative_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/frame.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/io_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/join_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/pivot_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/resample_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/session.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/statement_params_constants.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/telemetry.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/transpose_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/where_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_internal/window_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/_typing.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/compiler/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/compiler/doc_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/compiler/query_compiler.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/default2pandas/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/dev-requirements.txt
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/base.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/groupby.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/resample.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/series.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/docstrings/window.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/series_extensions.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/io/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/io/snow_io.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/utils/__init__.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/utils/error_message.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/utils/exceptions.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
 create mode 100644 src/snowflake/snowpark/modin/plugin/utils/warning_message.py
 create mode 100644 src/snowflake/snowpark/modin/utils.py
 create mode 100644 tests/integ/modin/__init__.py
 create mode 100644 tests/integ/modin/binary/__init__.py
 create mode 100644 tests/integ/modin/binary/test_binary_default2pandas.py
 create mode 100644 tests/integ/modin/binary/test_binary_op.py
 create mode 100644 tests/integ/modin/conftest.py
 create mode 100644 tests/integ/modin/data.py
 create mode 100644 tests/integ/modin/extensions/__init__.py
 create mode 100644 tests/integ/modin/extensions/test_dataframe_extensions.py
 create mode 100644 tests/integ/modin/extensions/test_series_extensions.py
 create mode 100644 tests/integ/modin/frame/__init__.py
 create mode 100644 tests/integ/modin/frame/conftest.py
 create mode 100644 tests/integ/modin/frame/test_add_prefix.py
 create mode 100644 tests/integ/modin/frame/test_add_suffix.py
 create mode 100644 tests/integ/modin/frame/test_aggregate.py
 create mode 100644 tests/integ/modin/frame/test_all_any.py
 create mode 100644 tests/integ/modin/frame/test_apply.py
 create mode 100644 tests/integ/modin/frame/test_applymap.py
 create mode 100644 tests/integ/modin/frame/test_astype.py
 create mode 100644 tests/integ/modin/frame/test_axis.py
 create mode 100644 tests/integ/modin/frame/test_copy.py
 create mode 100644 tests/integ/modin/frame/test_cumulative.py
 create mode 100644 tests/integ/modin/frame/test_describe.py
 create mode 100644 tests/integ/modin/frame/test_diff.py
 create mode 100644 tests/integ/modin/frame/test_drop.py
 create mode 100644 tests/integ/modin/frame/test_drop_duplicates.py
 create mode 100644 tests/integ/modin/frame/test_dropna.py
 create mode 100644 tests/integ/modin/frame/test_dtypes.py
 create mode 100644 tests/integ/modin/frame/test_duplicated.py
 create mode 100644 tests/integ/modin/frame/test_empty.py
 create mode 100644 tests/integ/modin/frame/test_ffill.py
 create mode 100644 tests/integ/modin/frame/test_fillna.py
 create mode 100644 tests/integ/modin/frame/test_filter.py
 create mode 100644 tests/integ/modin/frame/test_first_last_valid_index.py
 create mode 100644 tests/integ/modin/frame/test_getattr.py
 create mode 100644 tests/integ/modin/frame/test_getitem.py
 create mode 100644 tests/integ/modin/frame/test_head_tail.py
 create mode 100644 tests/integ/modin/frame/test_idxmax_idxmin.py
 create mode 100644 tests/integ/modin/frame/test_iloc.py
 create mode 100644 tests/integ/modin/frame/test_info.py
 create mode 100644 tests/integ/modin/frame/test_insert.py
 create mode 100644 tests/integ/modin/frame/test_isin.py
 create mode 100644 tests/integ/modin/frame/test_isna.py
 create mode 100644 tests/integ/modin/frame/test_iterrows.py
 create mode 100644 tests/integ/modin/frame/test_itertuples.py
 create mode 100644 tests/integ/modin/frame/test_join.py
 create mode 100644 tests/integ/modin/frame/test_len.py
 create mode 100644 tests/integ/modin/frame/test_loc.py
 create mode 100644 tests/integ/modin/frame/test_mask.py
 create mode 100644 tests/integ/modin/frame/test_melt.py
 create mode 100644 tests/integ/modin/frame/test_memory_usage.py
 create mode 100644 tests/integ/modin/frame/test_merge.py
 create mode 100644 tests/integ/modin/frame/test_name.py
 create mode 100644 tests/integ/modin/frame/test_ndim.py
 create mode 100644 tests/integ/modin/frame/test_nunique.py
 create mode 100644 tests/integ/modin/frame/test_quantile.py
 create mode 100644 tests/integ/modin/frame/test_rank.py
 create mode 100644 tests/integ/modin/frame/test_rename.py
 create mode 100644 tests/integ/modin/frame/test_replace.py
 create mode 100644 tests/integ/modin/frame/test_repr.py
 create mode 100644 tests/integ/modin/frame/test_reset_index.py
 create mode 100644 tests/integ/modin/frame/test_round.py
 create mode 100644 tests/integ/modin/frame/test_sample.py
 create mode 100644 tests/integ/modin/frame/test_select_dtypes.py
 create mode 100644 tests/integ/modin/frame/test_set_index.py
 create mode 100644 tests/integ/modin/frame/test_setitem.py
 create mode 100644 tests/integ/modin/frame/test_shape.py
 create mode 100644 tests/integ/modin/frame/test_shift.py
 create mode 100644 tests/integ/modin/frame/test_size.py
 create mode 100644 tests/integ/modin/frame/test_skew.py
 create mode 100644 tests/integ/modin/frame/test_sort_index.py
 create mode 100644 tests/integ/modin/frame/test_sort_values.py
 create mode 100644 tests/integ/modin/frame/test_squeeze.py
 create mode 100644 tests/integ/modin/frame/test_take.py
 create mode 100644 tests/integ/modin/frame/test_to_snowflake.py
 create mode 100644 tests/integ/modin/frame/test_to_snowpark.py
 create mode 100644 tests/integ/modin/frame/test_transpose.py
 create mode 100644 tests/integ/modin/frame/test_unary_op.py
 create mode 100644 tests/integ/modin/frame/test_value_counts.py
 create mode 100644 tests/integ/modin/frame/test_where.py
 create mode 100644 tests/integ/modin/groupby/__init__.py
 create mode 100644 tests/integ/modin/groupby/conftest.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_apply.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_basic_agg.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_dataframe_cumulative.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_dataframe_rank.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_dataframe_shift.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_default2pandas.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_head_tail.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_negative.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_ngroups.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_nunique.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_property.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_series.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_series_cumulative.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_series_rank.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_series_shift.py
 create mode 100644 tests/integ/modin/groupby/test_groupby_transform.py
 create mode 100644 tests/integ/modin/groupby/test_grouping.py
 create mode 100644 tests/integ/modin/groupby/test_min_max.py
 create mode 100644 tests/integ/modin/groupby/test_quantile.py
 create mode 100644 tests/integ/modin/io/test_read_csv.py
 create mode 100644 tests/integ/modin/io/test_read_json.py
 create mode 100644 tests/integ/modin/io/test_read_parquet.py
 create mode 100644 tests/integ/modin/io/test_read_snowflake.py
 create mode 100644 tests/integ/modin/io/test_read_snowflake_query_call.py
 create mode 100644 tests/integ/modin/io/test_read_snowflake_query_cte.py
 create mode 100644 tests/integ/modin/io/test_read_snowflake_query_order_by.py
 create mode 100644 tests/integ/modin/io/test_read_snowflake_select_query.py
 create mode 100644 tests/integ/modin/io/test_to_dict.py
 create mode 100644 tests/integ/modin/io/test_to_pandas.py
 create mode 100644 tests/integ/modin/pandas_api_coverage.py
 create mode 100644 tests/integ/modin/pivot/__init__.py
 create mode 100644 tests/integ/modin/pivot/conftest.py
 create mode 100644 tests/integ/modin/pivot/pivot_utils.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_dropna.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_fill_value.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_margins.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_multiple.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_negative.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_single.py
 create mode 100644 tests/integ/modin/pivot/test_pivot_utils.py
 create mode 100644 tests/integ/modin/resample/__init__.py
 create mode 100644 tests/integ/modin/resample/test_resample.py
 create mode 100644 tests/integ/modin/resample/test_resample_negative.py
 create mode 100644 tests/integ/modin/series/__init__.py
 create mode 100644 tests/integ/modin/series/conftest.py
 create mode 100644 tests/integ/modin/series/test_add_prefix.py
 create mode 100644 tests/integ/modin/series/test_add_suffix.py
 create mode 100644 tests/integ/modin/series/test_aggregate.py
 create mode 100644 tests/integ/modin/series/test_all_any.py
 create mode 100644 tests/integ/modin/series/test_apply.py
 create mode 100644 tests/integ/modin/series/test_astype.py
 create mode 100644 tests/integ/modin/series/test_axis.py
 create mode 100644 tests/integ/modin/series/test_bitwise_operators.py
 create mode 100644 tests/integ/modin/series/test_convert_dtype.py
 create mode 100644 tests/integ/modin/series/test_copy.py
 create mode 100644 tests/integ/modin/series/test_cumulative.py
 create mode 100644 tests/integ/modin/series/test_describe.py
 create mode 100644 tests/integ/modin/series/test_diff.py
 create mode 100644 tests/integ/modin/series/test_drop_duplicates.py
 create mode 100644 tests/integ/modin/series/test_dropna.py
 create mode 100644 tests/integ/modin/series/test_dt_accessor.py
 create mode 100644 tests/integ/modin/series/test_dt_accessor_unsupported.py
 create mode 100644 tests/integ/modin/series/test_duplicated.py
 create mode 100644 tests/integ/modin/series/test_empty.py
 create mode 100644 tests/integ/modin/series/test_ffill.py
 create mode 100644 tests/integ/modin/series/test_fillna.py
 create mode 100644 tests/integ/modin/series/test_first_last_valid_index.py
 create mode 100644 tests/integ/modin/series/test_getattr.py
 create mode 100644 tests/integ/modin/series/test_getitem.py
 create mode 100644 tests/integ/modin/series/test_head_tail.py
 create mode 100644 tests/integ/modin/series/test_idxmax_idxmin.py
 create mode 100644 tests/integ/modin/series/test_iloc.py
 create mode 100644 tests/integ/modin/series/test_isin.py
 create mode 100644 tests/integ/modin/series/test_isna.py
 create mode 100644 tests/integ/modin/series/test_len.py
 create mode 100644 tests/integ/modin/series/test_loc.py
 create mode 100644 tests/integ/modin/series/test_mask.py
 create mode 100644 tests/integ/modin/series/test_name.py
 create mode 100644 tests/integ/modin/series/test_ndim.py
 create mode 100644 tests/integ/modin/series/test_nunique.py
 create mode 100644 tests/integ/modin/series/test_quantile.py
 create mode 100644 tests/integ/modin/series/test_rank.py
 create mode 100644 tests/integ/modin/series/test_rename.py
 create mode 100644 tests/integ/modin/series/test_replace.py
 create mode 100644 tests/integ/modin/series/test_repr.py
 create mode 100644 tests/integ/modin/series/test_round.py
 create mode 100644 tests/integ/modin/series/test_sample.py
 create mode 100644 tests/integ/modin/series/test_setitem.py
 create mode 100644 tests/integ/modin/series/test_shape.py
 create mode 100644 tests/integ/modin/series/test_shift.py
 create mode 100644 tests/integ/modin/series/test_size.py
 create mode 100644 tests/integ/modin/series/test_sort_index.py
 create mode 100644 tests/integ/modin/series/test_sort_values.py
 create mode 100644 tests/integ/modin/series/test_squeeze.py
 create mode 100644 tests/integ/modin/series/test_str_accessor.py
 create mode 100644 tests/integ/modin/series/test_take.py
 create mode 100644 tests/integ/modin/series/test_to_snowflake.py
 create mode 100644 tests/integ/modin/series/test_to_snowpark.py
 create mode 100644 tests/integ/modin/series/test_transpose.py
 create mode 100644 tests/integ/modin/series/test_unary_op.py
 create mode 100644 tests/integ/modin/series/test_unique.py
 create mode 100644 tests/integ/modin/series/test_value_counts.py
 create mode 100644 tests/integ/modin/series/test_where.py
 create mode 100644 tests/integ/modin/sql_counter.py
 create mode 100644 tests/integ/modin/strings/__init__.py
 create mode 100644 tests/integ/modin/strings/test_case_justify.py
 create mode 100644 tests/integ/modin/strings/test_cat.py
 create mode 100644 tests/integ/modin/strings/test_extract.py
 create mode 100644 tests/integ/modin/strings/test_get_dummies.py
 create mode 100644 tests/integ/modin/strings/test_get_dummies_dataframe.py
 create mode 100644 tests/integ/modin/strings/test_get_dummies_series.py
 create mode 100644 tests/integ/modin/strings/test_strings.py
 create mode 100644 tests/integ/modin/test_classes.py
 create mode 100644 tests/integ/modin/test_concat.py
 create mode 100644 tests/integ/modin/test_cut.py
 create mode 100644 tests/integ/modin/test_default2pandas.py
 create mode 100644 tests/integ/modin/test_df_to_snowpark_pandas.py
 create mode 100644 tests/integ/modin/test_dtype_mapping.py
 create mode 100644 tests/integ/modin/test_from_pandas_to_pandas.py
 create mode 100644 tests/integ/modin/test_internal_frame.py
 create mode 100644 tests/integ/modin/test_merge.py
 create mode 100644 tests/integ/modin/test_numpy.py
 create mode 100644 tests/integ/modin/test_ordered_dataframe.py
 create mode 100644 tests/integ/modin/test_qcut.py
 create mode 100644 tests/integ/modin/test_session.py
 create mode 100644 tests/integ/modin/test_sql_counter.py
 create mode 100644 tests/integ/modin/test_telemetry.py
 create mode 100644 tests/integ/modin/test_to_numpy.py
 create mode 100644 tests/integ/modin/test_unique.py
 create mode 100644 tests/integ/modin/test_utils.py
 create mode 100644 tests/integ/modin/tools/__init__.py
 create mode 100644 tests/integ/modin/tools/test_date_range.py
 create mode 100644 tests/integ/modin/tools/test_to_datetime.py
 create mode 100644 tests/integ/modin/tools/test_to_numeric.py
 create mode 100644 tests/integ/modin/utils.py
 create mode 100644 tests/integ/modin/window/test_rolling.py
 create mode 100644 tests/resources/iris.csv
 create mode 100644 tests/unit/modin/__init__.py
 create mode 100644 tests/unit/modin/conftest.py
 create mode 100644 tests/unit/modin/default2pandas/__init__.py
 create mode 100644 tests/unit/modin/default2pandas/test_stored_procedure_utils.py
 create mode 100644 tests/unit/modin/extensions/__init__.py
 create mode 100644 tests/unit/modin/extensions/test_pd_extensions.py
 create mode 100644 tests/unit/modin/modin/__init__.py
 create mode 100644 tests/unit/modin/modin/docs_module/__init__.py
 create mode 100644 tests/unit/modin/modin/docs_module/classes.py
 create mode 100644 tests/unit/modin/modin/docs_module/functions.py
 create mode 100644 tests/unit/modin/modin/docs_module_with_just_base/__init__.py
 create mode 100644 tests/unit/modin/modin/docs_module_with_just_base/classes.py
 create mode 100644 tests/unit/modin/modin/test_envvars.py
 create mode 100644 tests/unit/modin/modin/test_parameter.py
 create mode 100644 tests/unit/modin/test_aggregation_utils.py
 create mode 100644 tests/unit/modin/test_apply_utils.py
 create mode 100644 tests/unit/modin/test_binary_op_utils.py
 create mode 100644 tests/unit/modin/test_class.py
 create mode 100644 tests/unit/modin/test_docstrings.py
 create mode 100644 tests/unit/modin/test_frontend_utils.py
 create mode 100644 tests/unit/modin/test_groupby_unsupported.py
 create mode 100644 tests/unit/modin/test_groupby_utils.py
 create mode 100644 tests/unit/modin/test_internal_frame.py
 create mode 100644 tests/unit/modin/test_io.py
 create mode 100644 tests/unit/modin/test_ordered_dataframe.py
 create mode 100644 tests/unit/modin/test_pandas_module.py
 create mode 100644 tests/unit/modin/test_python_version.py
 create mode 100644 tests/unit/modin/test_series_cat.py
 create mode 100644 tests/unit/modin/test_series_dt.py
 create mode 100644 tests/unit/modin/test_series_strings.py
 create mode 100644 tests/unit/modin/test_snowflake_query_compiler.py
 create mode 100644 tests/unit/modin/test_telemetry.py
 create mode 100644 tests/unit/modin/test_test_utils.py
 create mode 100644 tests/unit/modin/test_unsupported.py
 create mode 100644 tests/unit/modin/test_utils.py

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 191e1fd72e5..8c4aff4faa9 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,4 @@
 * @snowflakedb/snowpark-python-api-reviewers
+/src/snowflake/snowpark/modin/ @snowflakedb/snowpandas
+/tests/integ/modin/ @snowflakedb/snowpandas
+/tests/unit/modin/ @snowflakedb/snowpandas
diff --git a/.github/workflows/changedoc_snowpark_pandas.yml b/.github/workflows/changedoc_snowpark_pandas.yml
new file mode 100644
index 00000000000..831232860de
--- /dev/null
+++ b/.github/workflows/changedoc_snowpark_pandas.yml
@@ -0,0 +1,22 @@
+name: Snowpark pandas Changedoc Check
+
+on:
+  pull_request:
+    types: [opened, synchronize, labeled, unlabeled]
+    branches:
+      - pandas-main
+    paths:
+      - 'src/snowflake/snowpark/modin/**'
+
+jobs:
+  check_pandas_change_doc:
+    runs-on: ubuntu-latest
+    if: ${{!contains(github.event.pull_request.labels.*.name, 'NO-PANDAS-CHANGEDOC-UPDATES')}}
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Ensure Snowpark pandas docs is updated
+      run: git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -q "docs/source/modin"
diff --git a/.github/workflows/changelog_snowpark_pandas.yml b/.github/workflows/changelog_snowpark_pandas.yml
new file mode 100644
index 00000000000..236796774cc
--- /dev/null
+++ b/.github/workflows/changelog_snowpark_pandas.yml
@@ -0,0 +1,22 @@
+name: Snowpark pandas Changelog Check
+
+on:
+  pull_request:
+    types: [opened, synchronize, labeled, unlabeled]
+    branches:
+      - pandas-main
+    paths:
+      - 'src/snowflake/snowpark/modin/**'
+
+jobs:
+  check_pandas_change_log:
+    runs-on: ubuntu-latest
+    if: ${{!contains(github.event.pull_request.labels.*.name, 'NO-PANDAS-CHANGELOG-UPDATES')}}
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Ensure PANDAS_CHANGELOG.md is updated
+      run: git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -wq "src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md"
diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
index 6d77c8234ff..6c0b02f8fe2 100644
--- a/.github/workflows/precommit.yml
+++ b/.github/workflows/precommit.yml
@@ -302,6 +302,115 @@ jobs:
             .tox/.coverage
             .tox/coverage.xml
 
+  test-snowpark-pandas:
+    name: Test modin-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest, windows-latest-64-cores, ubuntu-latest-64-cores]
+        python-version: [ "3.9", "3.10", "3.11" ]
+        cloud-provider: [aws, azure, gcp]
+        exclude:
+          # only run macos with aws py3.9 for doctest
+          - os: macos-latest
+            python-version: "3.10"
+          - os: macos-latest
+            python-version: "3.11"
+          - os: macos-latest
+            python-version: "3.9"
+            cloud-provider: azure
+          - os: macos-latest
+            python-version: "3.9"
+            cloud-provider: gcp
+          # only run ubuntu with py3.9 on aws and py3.10 on azure
+          - os: ubuntu-latest-64-cores
+            python-version: "3.11"
+          - os: ubuntu-latest-64-cores
+            python-version: "3.9"
+            cloud-provider: azure
+          - os: ubuntu-latest-64-cores
+            python-version: "3.9"
+            cloud-provider: gcp
+          - os: ubuntu-latest-64-cores
+            python-version: "3.10"
+            cloud-provider: aws
+          - os: ubuntu-latest-64-cores
+            python-version: "3.10"
+            cloud-provider: gcp
+          # only run windows with py3.10 on gcp
+          - os: windows-latest-64-cores
+            python-version: "3.9"
+          - os: windows-latest-64-cores
+            python-version: "3.10"
+          - os: windows-latest-64-cores
+            python-version: "3.11"
+            cloud-provider: aws
+          - os: windows-latest-64-cores
+            python-version: "3.11"
+            cloud-provider: azure
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Decrypt parameters.py
+        shell: bash
+        run: .github/scripts/decrypt_parameters.sh
+        env:
+          PARAMETER_PASSWORD: ${{ secrets.PARAMETER_PASSWORD }}
+          CLOUD_PROVIDER: ${{ matrix.cloud-provider }}
+      - name: Download wheel(s)
+        uses: actions/download-artifact@v4
+        with:
+          name: wheel
+          path: dist
+      - name: Show wheels downloaded
+        run: ls -lh dist
+        shell: bash
+      - name: Upgrade setuptools, pip and wheel
+        run: python -m pip install -U setuptools pip wheel
+      - name: Install tox
+        run: python -m pip install tox
+        # only run doctest for macos on aws
+      - if: ${{ matrix.os == 'macos-latest' && matrix.cloud-provider == 'aws' }}
+        name: Run Snowpark pandas API doctests
+        run: python -m tox -e "py${PYTHON_VERSION}-doctest-snowparkpandasdoctest-modin-ci"
+        env:
+          PYTHON_VERSION: ${{ matrix.python-version }}
+          cloud_provider: ${{ matrix.cloud-provider }}
+          PYTEST_ADDOPTS: --color=yes --tb=short
+          TOX_PARALLEL_NO_SPINNER: 1
+          # Specify SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1 when adding >= python3.11 with no server-side support
+          # For example, see https://github.com/snowflakedb/snowpark-python/pull/681
+        shell: bash
+        # do not run other tests for macos on aws
+      - if: ${{ !(matrix.os == 'macos-latest' && matrix.cloud-provider == 'aws') }}
+        name: Run Snowpark pandas API tests (excluding doctests)
+        run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasnotdoctest-modin-ci"
+        env:
+          PYTHON_VERSION: ${{ matrix.python-version }}
+          cloud_provider: ${{ matrix.cloud-provider }}
+          PYTEST_ADDOPTS: --color=yes --tb=short
+          TOX_PARALLEL_NO_SPINNER: 1
+        shell: bash
+      - name: Combine coverages
+        run: python -m tox -e coverage --skip-missing-interpreters false
+        shell: bash
+        env:
+          SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1
+      - uses: actions/upload-artifact@v4
+        with:
+          name: coverage_${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-snowpark-pandas-testing
+          path: |
+            .tox/.coverage
+            .tox/coverage.xml
+
   combine-coverage:
     if: ${{ success() || failure() }}
     name: Combine coverage
@@ -365,7 +474,7 @@ jobs:
       - name: Upgrade setuptools and pip
         run: python -m pip install -U setuptools pip
       - name: Install Snowpark
-        run: python -m pip install ".[development, pandas]"
+        run: python -m pip install ".[modin-development, development, pandas]"
       - name: Install Sphinx
         run: python -m pip install sphinx
       - name: Build document
diff --git a/LICENSE.txt b/LICENSE.txt
index f024f6cad12..5995cf32a72 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -187,7 +187,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright (c) 2012-2023 Snowflake Computing, Inc.
+   Copyright (c) 2012-2024 Snowflake Computing, Inc.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/ci/check_standalone_function_snowpark_pandas_telemetry_decorator.py b/ci/check_standalone_function_snowpark_pandas_telemetry_decorator.py
new file mode 100644
index 00000000000..32b5da09168
--- /dev/null
+++ b/ci/check_standalone_function_snowpark_pandas_telemetry_decorator.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import ast
+
+
+class DecoratorError(Exception):
+    pass
+
+
+def check_standalone_function_snowpark_pandas_telemetry_decorator(
+    target_file: str,
+    telemetry_decorator_name: str,
+) -> None:
+    """
+    Check if all standalone functions in the target file have been decorated by the decorator with
+    name telemetry_decorator_name.
+    Raises a DecoratorError if the decorator is missing.
+
+    Args:
+        target_file (str): Path to the target file.
+        telemetry_decorator_name: Name of the telemetry decorator that is checked.
+    """
+    # Get the source code of the target file
+    with open(target_file) as file:
+        source_code = file.read()
+    assert source_code.strip(), f"Source code in '{target_file}' is empty."
+    # Parse the abstract syntax tree
+    tree = ast.parse(source_code)
+
+    # List of str: function names that need the decorator.
+    failed_funcs = []
+
+    # Apply the decorator to the functions with matching return types
+    # Exclude sub-functions with iter_child_nodes which yields direct child nodes
+    for node in ast.iter_child_nodes(tree):
+        if (
+            isinstance(node, ast.FunctionDef)  # Check if it is function type
+            and not node.name.startswith(
+                "_"
+            )  # the function is not private (does not start with an underscore)
+            and node.name
+        ):
+            has_telemetry_decorator = False
+            for decorator in node.decorator_list:
+                if (
+                    hasattr(decorator, "id")
+                    and decorator.id == telemetry_decorator_name
+                ):
+                    has_telemetry_decorator = True
+                    break
+            if not has_telemetry_decorator:
+                failed_funcs.append(node.name)
+    if len(failed_funcs) > 0:
+        raise DecoratorError(
+            f"functions {failed_funcs} should be decorated with {telemetry_decorator_name}"
+        )
+
+
+if __name__ == "__main__":
+    check_standalone_function_snowpark_pandas_telemetry_decorator(
+        target_file="src/snowflake/snowpark/modin/pandas/io.py",
+        telemetry_decorator_name="snowpark_pandas_telemetry_standalone_function_decorator",
+    )
+    check_standalone_function_snowpark_pandas_telemetry_decorator(
+        target_file="src/snowflake/snowpark/modin/pandas/general.py",
+        telemetry_decorator_name="snowpark_pandas_telemetry_standalone_function_decorator",
+    )
+    check_standalone_function_snowpark_pandas_telemetry_decorator(
+        target_file="src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py",
+        telemetry_decorator_name="snowpark_pandas_telemetry_standalone_function_decorator",
+    )
+    check_standalone_function_snowpark_pandas_telemetry_decorator(
+        target_file="src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py",
+        telemetry_decorator_name="snowpark_pandas_telemetry_standalone_function_decorator",
+    )
diff --git a/setup.py b/setup.py
index a400f84aec4..454a0ae92be 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,9 @@
 THIS_DIR = os.path.dirname(os.path.realpath(__file__))
 SRC_DIR = os.path.join(THIS_DIR, "src")
 SNOWPARK_SRC_DIR = os.path.join(SRC_DIR, "snowflake", "snowpark")
+MODIN_DEPENDENCY_VERSION = (
+    "==0.28.1"  # Snowpark pandas requires modin 0.28.1, which depends on pandas 2.2.1
+)
 CONNECTOR_DEPENDENCY_VERSION = ">=3.6.0, <4.0.0"
 INSTALL_REQ_LIST = [
     "setuptools>=40.6.0",
@@ -65,6 +68,22 @@
         "snowflake.snowpark._internal",
         "snowflake.snowpark._internal.analyzer",
         "snowflake.snowpark.mock",
+        "snowflake.snowpark.modin",
+        "snowflake.snowpark.modin.config",
+        "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas",
+        "snowflake.snowpark.modin.core.execution.dispatching",
+        "snowflake.snowpark.modin.core.execution.dispatching.factories",
+        "snowflake.snowpark.modin.pandas",
+        "snowflake.snowpark.modin.pandas.api.extensions",
+        "snowflake.snowpark.modin.plugin",
+        "snowflake.snowpark.modin.plugin._internal",
+        "snowflake.snowpark.modin.plugin.compiler",
+        "snowflake.snowpark.modin.plugin.docstrings",
+        "snowflake.snowpark.modin.plugin.default2pandas",
+        "snowflake.snowpark.modin.plugin.docstrings",
+        "snowflake.snowpark.modin.plugin.extensions",
+        "snowflake.snowpark.modin.plugin.io",
+        "snowflake.snowpark.modin.plugin.utils",
     ],
     package_dir={
         "": "src",
@@ -76,6 +95,9 @@
         "pandas": [
             f"snowflake-connector-python[pandas]{CONNECTOR_DEPENDENCY_VERSION}",
         ],
+        "modin": [
+            f"modin{MODIN_DEPENDENCY_VERSION}",
+        ],
         "secure-local-storage": [
             f"snowflake-connector-python[secure-local-storage]{CONNECTOR_DEPENDENCY_VERSION}",
         ],
@@ -88,6 +110,13 @@
             "pytest-timeout",
             "pre-commit",
         ],
+        "modin-development": [
+            "pytest-assume",  # Snowpark pandas
+            "decorator",  # Snowpark pandas
+            "scipy",  # Snowpark pandas 3rd party library testing
+            "statsmodels",  # Snowpark pandas 3rd party library testing
+            f"modin{MODIN_DEPENDENCY_VERSION}",
+        ],
         "localtest": [
             "pandas",
             "pyarrow",
diff --git a/src/snowflake/snowpark/_internal/open_telemetry.py b/src/snowflake/snowpark/_internal/open_telemetry.py
index 20b1b81858c..fbe0e8cb03b 100644
--- a/src/snowflake/snowpark/_internal/open_telemetry.py
+++ b/src/snowflake/snowpark/_internal/open_telemetry.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
 #
 
 #
diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py
index 8caa7c4d178..5bc44a57ddd 100644
--- a/src/snowflake/snowpark/dataframe.py
+++ b/src/snowflake/snowpark/dataframe.py
@@ -927,6 +927,96 @@ def to_df(self, *names: Union[str, Iterable[str]]) -> "DataFrame":
             new_cols.append(Column(attr).alias(name))
         return self.select(new_cols)
 
+    @df_collect_api_telemetry
+    def to_snowpark_pandas(
+        self,
+        index_col: Optional[Union[str, List[str]]] = None,
+        columns: Optional[List[str]] = None,
+    ) -> "snowflake.snowpark.modin.pandas.DataFrame":
+        """
+        Convert the Snowpark DataFrame to Snowpark pandas DataFrame.
+
+        Args:
+            index_col: A column name or a list of column names to use as index.
+            columns: A list of column names for the columns to select from the Snowpark DataFrame. If not specified, select
+                all columns except ones configured in index_col.
+
+        Returns:
+            :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+                A Snowpark pandas DataFrame contains index and data columns based on the snapshot of the current
+                Snowpark DataFrame, which triggers an eager evaluation.
+
+                If index_col is provided, the specified index_col is selected as the index column(s) for the result dataframe,
+                otherwise, a default range index from 0 to n - 1 is created as the index column, where n is the number
+                of rows. Please note that is also used as the start row ordering for the dataframe, but there is no
+                guarantee that the default row ordering is the same for two Snowpark pandas dataframe created from
+                the same Snowpark Dataframe.
+
+                If columns are provided, the specified columns are selected as the data column(s) for the result dataframe,
+                otherwise, all Snowpark DataFrame columns (exclude index_col) are selected as data columns.
+
+        Note:
+            Transformations performed on the returned Snowpark pandas Dataframe do not affect the Snowpark DataFrame
+            from which it was created. Call
+            - :func:`snowflake.snowpark.modin.pandas.to_snowpark <snowflake.snowpark.modin.pandas.to_snowpark>`
+            to transform a Snowpark pandas DataFrame back to a Snowpark DataFrame.
+
+            The column names used for columns or index_cols must be Normalized Snowflake Identifiers, and the
+            Normalized Snowflake Identifiers of a Snowpark DataFrame can be displayed by calling df.show().
+            For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~snowflake.snowpark.modin.pandas.read_snowflake`
+
+            `to_snowpark_pandas` works only when the environment is set up correctly for Snowpark pandas. This environment
+            may require version of Python and pandas different from what Snowpark Python uses If the environment is setup
+            incorrectly, an error will be raised when `to_snowpark_pandas` is called.
+
+            For Python version support information, please refer to:
+            - the prerequisites section https://docs.snowflake.com/LIMITEDACCESS/snowpark-pandas#prerequisites
+            - the installation section https://docs.snowflake.com/LIMITEDACCESS/snowpark-pandas#installing-the-snowpark-pandas-api
+
+        See also:
+            - :func:`snowflake.snowpark.modin.pandas.to_snowpark <snowflake.snowpark.modin.pandas.to_snowpark>`
+            - :func:`snowflake.snowpark.modin.pandas.DataFrame.to_snowpark <snowflake.snowpark.modin.pandas.DataFrame.to_snowpark>`
+            - :func:`snowflake.snowpark.modin.pandas.Series.to_snowpark <snowflake.snowpark.modin.pandas.Series.to_snowpark>`
+
+        Example::
+            >>> df = session.create_dataframe([[1, 2, 3]], schema=["a", "b", "c"])
+            >>> snowpark_pandas_df = df.to_snowpark_pandas()  # doctest: +SKIP
+            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
+               A  B  C
+            0  1  2  3
+
+            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col='A')  # doctest: +SKIP
+            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
+               B  C
+            A
+            1  2  3
+            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col='A', columns=['B'])  # doctest: +SKIP
+            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
+               B
+            A
+            1  2
+            >>> snowpark_pandas_df = df.to_snowpark_pandas(index_col=['B', 'A'], columns=['A', 'C', 'A'])  # doctest: +SKIP
+            >>> snowpark_pandas_df      # doctest: +SKIP +NORMALIZE_WHITESPACE
+                 A  C  A
+            B A
+            2 1  1  3  1
+        """
+        import snowflake.snowpark.modin.pandas as pd  # pragma: no cover
+
+        # create a temporary table out of the current snowpark dataframe
+        temporary_table_name = random_name_for_temp_object(
+            TempObjectType.TABLE
+        )  # pragma: no cover
+        self.write.save_as_table(
+            temporary_table_name, mode="errorifexists", table_type="temporary"
+        )  # pragma: no cover
+
+        snowpandas_df = pd.read_snowflake(
+            name_or_query=temporary_table_name, index_col=index_col, columns=columns
+        )  # pragma: no cover
+
+        return snowpandas_df
+
     def __getitem__(self, item: Union[str, Column, List, Tuple, int]):
         if isinstance(item, str):
             return self.col(item)
diff --git a/src/snowflake/snowpark/modin/NOTICE b/src/snowflake/snowpark/modin/NOTICE
new file mode 100644
index 00000000000..b2a3d24db71
--- /dev/null
+++ b/src/snowflake/snowpark/modin/NOTICE
@@ -0,0 +1,249 @@
+Use of the Snowflake Snowpark pandas library is governed by your customer agreement with Snowflake
+as Client Software. The open source libraries used in the Snowflake Snowpark pandas library include:
+
+#### [Modin (Apache 2)](https://github.com/modin-project/modin/)
+
+```
+Modin
+
+Copyright (c) 2018-2024 Modin Developers.
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+```
+
+#### [pandas (BSD 3-Clause License)](https://github.com/pandas-dev/pandas)
+
+```
+BSD 3-Clause License
+
+Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Copyright (c) 2011-2024, Open source contributors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+```
diff --git a/src/snowflake/snowpark/modin/__init__.py b/src/snowflake/snowpark/modin/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/config/__init__.py b/src/snowflake/snowpark/modin/config/__init__.py
new file mode 100644
index 00000000000..16275bef184
--- /dev/null
+++ b/src/snowflake/snowpark/modin/config/__init__.py
@@ -0,0 +1,119 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module houses config entities which can be used for Modin behavior tuning."""
+
+from snowflake.snowpark.modin.config.envvars import (
+    AsvDataSizeConfig,
+    AsvImplementation,
+    AsyncReadMode,
+    BenchmarkMode,
+    CIAWSAccessKeyID,
+    CIAWSSecretAccessKey,
+    CpuCount,
+    DaskThreadsPerWorker,
+    DocModule,
+    DoUseCalcite,
+    Engine,
+    EnvironmentVariable,
+    ExperimentalGroupbyImpl,
+    ExperimentalNumPyAPI,
+    GithubCI,
+    GpuCount,
+    HdkFragmentSize,
+    HdkLaunchParameters,
+    IsDebug,
+    IsExperimental,
+    IsRayCluster,
+    LazyExecution,
+    LogFileSize,
+    LogMemoryInterval,
+    LogMode,
+    Memory,
+    MinPartitionSize,
+    ModinNumpy,
+    NPartitions,
+    PersistentPickle,
+    ProgressBar,
+    RangePartitioning,
+    RangePartitioningGroupby,
+    RayRedisAddress,
+    RayRedisPassword,
+    ReadSqlEngine,
+    StorageFormat,
+    TestDatasetSize,
+    TestReadFromPostgres,
+    TestReadFromSqlServer,
+    TrackFileLeaks,
+)
+from snowflake.snowpark.modin.config.pubsub import Parameter, ValueSource
+
+__all__ = [
+    "EnvironmentVariable",
+    "Parameter",
+    "ValueSource",
+    # General settings
+    "IsDebug",
+    "Engine",
+    "StorageFormat",
+    "CpuCount",
+    "GpuCount",
+    "Memory",
+    # Ray specific
+    "IsRayCluster",
+    "RayRedisAddress",
+    "RayRedisPassword",
+    "LazyExecution",
+    # Dask specific
+    "DaskThreadsPerWorker",
+    # Partitioning
+    "NPartitions",
+    "MinPartitionSize",
+    # HDK specific
+    "HdkFragmentSize",
+    "DoUseCalcite",
+    "HdkLaunchParameters",
+    # ASV specific
+    "TestDatasetSize",
+    "AsvImplementation",
+    "AsvDataSizeConfig",
+    # Specific features
+    "ProgressBar",
+    "BenchmarkMode",
+    "PersistentPickle",
+    "ModinNumpy",
+    "ExperimentalNumPyAPI",
+    "RangePartitioningGroupby",
+    "RangePartitioning",
+    "ExperimentalGroupbyImpl",
+    "AsyncReadMode",
+    "ReadSqlEngine",
+    "IsExperimental",
+    # For tests
+    "TrackFileLeaks",
+    "TestReadFromSqlServer",
+    "TestReadFromPostgres",
+    "GithubCI",
+    "CIAWSSecretAccessKey",
+    "CIAWSAccessKeyID",
+    # Logging
+    "LogMode",
+    "LogMemoryInterval",
+    "LogFileSize",
+    # Plugin settings
+    "DocModule",
+]
diff --git a/src/snowflake/snowpark/modin/config/__main__.py b/src/snowflake/snowpark/modin/config/__main__.py
new file mode 100644
index 00000000000..db55f40c548
--- /dev/null
+++ b/src/snowflake/snowpark/modin/config/__main__.py
@@ -0,0 +1,108 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""
+Content of this file should be executed if module `modin.config` is called.
+
+If module is called (using `python -m modin.config`) configs help will be printed.
+Using `-export_path` option configs description can be exported to the external CSV file
+provided with this flag.
+"""  # pragma: no cover
+
+import argparse  # pragma: no cover
+from textwrap import dedent  # pragma: no cover
+
+import pandas  # pragma: no cover
+
+import snowflake.snowpark.modin.config as cfg  # pragma: no cover
+
+
+def print_config_help() -> None:  # pragma: no cover
+    """Print configs help messages."""
+    for objname in sorted(cfg.__all__):
+        obj = getattr(cfg, objname)
+        if (
+            isinstance(obj, type)
+            and issubclass(obj, cfg.Parameter)
+            and not obj.is_abstract
+        ):
+            print(f"{obj.get_help()}\n\tCurrent value: {obj.get()}")  # noqa: T201
+
+
+def export_config_help(filename: str) -> None:  # pragma: no cover
+    """
+    Export all configs help messages to the CSV file.
+
+    Parameters
+    ----------
+    filename : str
+        Name of the file to export configs data.
+    """
+    configs_data = []
+    default_values = dict(
+        RayRedisPassword="random string",
+        CpuCount="multiprocessing.cpu_count()",
+        NPartitions="equals to MODIN_CPUS env",
+    )
+    for objname in sorted(cfg.__all__):
+        obj = getattr(cfg, objname)
+        if (
+            isinstance(obj, type)
+            and issubclass(obj, cfg.Parameter)
+            and not obj.is_abstract
+        ):
+            data = {
+                "Config Name": obj.__name__,
+                "Env. Variable Name": getattr(
+                    obj, "varname", "not backed by environment"
+                ),
+                "Default Value": default_values.get(obj.__name__, obj._get_default()),
+                # `Notes` `-` underlining can't be correctly parsed inside csv table by sphinx
+                "Description": dedent(obj.__doc__ or "").replace(
+                    "Notes\n-----", "Notes:\n"
+                ),
+                "Options": obj.choices,
+            }
+            configs_data.append(data)
+
+    pandas.DataFrame(
+        configs_data,
+        columns=[
+            "Config Name",
+            "Env. Variable Name",
+            "Default Value",
+            "Description",
+            "Options",
+        ],
+    ).to_csv(filename, index=False)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--export-path",
+        dest="export_path",
+        type=str,
+        required=False,
+        default=None,
+        help="File path to export configs data.",
+    )
+    export_path = parser.parse_args().export_path
+    if export_path:
+        export_config_help(export_path)
+    else:
+        print_config_help()
diff --git a/src/snowflake/snowpark/modin/config/envvars.py b/src/snowflake/snowpark/modin/config/envvars.py
new file mode 100644
index 00000000000..63bea0b0f28
--- /dev/null
+++ b/src/snowflake/snowpark/modin/config/envvars.py
@@ -0,0 +1,913 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module houses Modin configs originated from environment variables."""
+
+import os
+import secrets
+import sys
+import warnings
+from textwrap import dedent
+from typing import Any, Optional
+
+from packaging import version
+from pandas.util._decorators import doc  # type: ignore[attr-defined]
+
+from snowflake.snowpark.modin.config.pubsub import (
+    _TYPE_PARAMS,
+    _UNSET,
+    DeprecationDescriptor,
+    ExactStr,
+    Parameter,
+    ValueSource,
+)
+
+
+class EnvironmentVariable(Parameter, type=str, abstract=True):  # pragma: no cover
+    """Base class for environment variables-based configuration."""
+
+    varname: Optional[str] = None
+
+    @classmethod
+    def _get_raw_from_config(cls) -> str:
+        """
+        Read the value from environment variable.
+
+        Returns
+        -------
+        str
+            Config raw value.
+
+        Raises
+        ------
+        TypeError
+            If `varname` is None.
+        KeyError
+            If value is absent.
+        """
+        if cls.varname is None:
+            raise TypeError("varname should not be None")
+        return os.environ[cls.varname]
+
+    @classmethod
+    def get_help(cls) -> str:
+        """
+        Generate user-presentable help for the config.
+
+        Returns
+        -------
+        str
+        """
+        help = f"{cls.varname}: {dedent(cls.__doc__ or 'Unknown').strip()}\n\tProvide {_TYPE_PARAMS[cls.type].help}"
+        if cls.choices:
+            help += f" (valid examples are: {', '.join(str(c) for c in cls.choices)})"
+        return help
+
+
+class EnvWithSibilings(
+    EnvironmentVariable,
+    # 'type' is a mandatory parameter for '__init_subclasses__', so we have to pass something here,
+    # this doesn't force child classes to have 'str' type though, they actually can be any type
+    type=str,
+):  # pragma: no cover
+    """Ensure values synchronization between sibling parameters."""
+
+    _update_sibling = True
+
+    @classmethod
+    def _sibling(cls) -> type["EnvWithSibilings"]:
+        """Return a sibling parameter."""
+        raise NotImplementedError()
+
+    @classmethod
+    def get(cls) -> Any:
+        """
+        Get parameter's value and ensure that it's equal to the sibling's value.
+
+        Returns
+        -------
+        Any
+        """
+        sibling = cls._sibling()
+
+        if sibling._value is _UNSET and cls._value is _UNSET:
+            super().get()
+            with warnings.catch_warnings():
+                # filter warnings that can potentially come from the potentially deprecated sibling
+                warnings.filterwarnings("ignore", category=FutureWarning)
+                super(EnvWithSibilings, sibling).get()
+
+            if (
+                cls._value_source
+                == sibling._value_source
+                == ValueSource.GOT_FROM_CFG_SOURCE
+            ):
+                raise ValueError(
+                    f"Configuration is ambiguous. You cannot set '{cls.varname}' and '{sibling.varname}' at the same time."
+                )
+
+            # further we assume that there are only two valid sources for the variables: 'GOT_FROM_CFG' and 'DEFAULT',
+            # as otherwise we wouldn't ended-up in this branch at all, because all other ways of setting a value
+            # changes the '._value' attribute from '_UNSET' to something meaningful
+            from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+
+            if cls._value_source == ValueSource.GOT_FROM_CFG_SOURCE:
+                ErrorMessage.catch_bugs_and_request_email(
+                    failure_condition=sibling._value_source != ValueSource.DEFAULT
+                )
+                sibling._value = cls._value
+                sibling._value_source = ValueSource.GOT_FROM_CFG_SOURCE
+            elif sibling._value_source == ValueSource.GOT_FROM_CFG_SOURCE:
+                ErrorMessage.catch_bugs_and_request_email(
+                    failure_condition=cls._value_source != ValueSource.DEFAULT
+                )
+                cls._value = sibling._value
+                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE
+            else:
+                ErrorMessage.catch_bugs_and_request_email(
+                    failure_condition=cls._value_source != ValueSource.DEFAULT
+                    or sibling._value_source != ValueSource.DEFAULT
+                )
+                # propagating 'cls' default value to the sibling
+                sibling._value = cls._value
+        return super().get()
+
+    @classmethod
+    def put(cls, value: Any) -> None:
+        """
+        Set a new value to this parameter as well as to its sibling.
+
+        Parameters
+        ----------
+        value : Any
+        """
+        super().put(value)
+        # avoid getting into an infinite recursion
+        if cls._update_sibling:
+            cls._update_sibling = False
+            try:
+                with warnings.catch_warnings():
+                    # filter potential future warnings of the sibling
+                    warnings.filterwarnings("ignore", category=FutureWarning)
+                    cls._sibling().put(value)
+            finally:
+                cls._update_sibling = True
+
+
+class IsDebug(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Force Modin engine to be "Python" unless specified by $MODIN_ENGINE."""
+
+    varname = "MODIN_DEBUG"
+
+
+class Engine(EnvironmentVariable, type=str):  # pragma: no cover
+    """Distribution engine to run queries by."""
+
+    varname = "MODIN_ENGINE"
+    choices = ("Ray", "Dask", "Python", "Native", "Unidist")
+
+    NOINIT_ENGINES = {
+        "Python",
+    }  # engines that don't require initialization, useful for unit tests
+
+    has_custom_engine = False
+
+    @classmethod
+    def _get_default(cls) -> str:
+        """
+        Get default value of the config.
+
+        Returns
+        -------
+        str
+        """
+        from snowflake.snowpark.modin.utils import (
+            MIN_DASK_VERSION,
+            MIN_RAY_VERSION,
+            MIN_UNIDIST_VERSION,
+        )
+
+        # If there's a custom engine, we don't need to check for any engine
+        # dependencies. Return the default "Python" engine.
+        if IsDebug.get() or cls.has_custom_engine:
+            return "Python"
+        try:
+            import ray
+
+        except ImportError:
+            pass
+        else:
+            if version.parse(ray.__version__) < MIN_RAY_VERSION:
+                raise ImportError(
+                    'Please `pip install "modin[ray]"` to install compatible Ray '
+                    + "version "
+                    + f"(>={MIN_RAY_VERSION})."
+                )
+            return "Ray"
+        try:
+            import dask
+            import distributed
+
+        except ImportError:
+            pass
+        else:
+            if (
+                version.parse(dask.__version__) < MIN_DASK_VERSION
+                or version.parse(distributed.__version__) < MIN_DASK_VERSION
+            ):
+                raise ImportError(
+                    f'Please `pip install "modin[dask]"` to install compatible Dask version (>={MIN_DASK_VERSION}).'
+                )
+            return "Dask"
+        try:
+            # We import ``DbWorker`` from this module since correct import of ``DbWorker`` itself
+            # from HDK is located in it with all the necessary options for dlopen.
+            from modin.experimental.core.execution.native.implementations.hdk_on_native.db_worker import (  # noqa
+                DbWorker,
+            )
+        except ImportError:
+            pass
+        else:
+            return "Native"
+        try:
+            import unidist
+
+        except ImportError:
+            pass
+        else:
+            if version.parse(unidist.__version__) < MIN_UNIDIST_VERSION:
+                raise ImportError(
+                    'Please `pip install "unidist[mpi]"` to install compatible unidist on MPI '
+                    + "version "
+                    + f"(>={MIN_UNIDIST_VERSION})."
+                )
+            return "Unidist"
+        raise ImportError(
+            "Please refer to installation documentation page to install an engine"
+        )
+
+    @classmethod
+    @doc(Parameter.add_option.__doc__)
+    def add_option(cls, choice: Any) -> Any:
+        choice = super().add_option(choice)
+        cls.NOINIT_ENGINES.add(choice)
+        cls.has_custom_engine = True
+        return choice
+
+
+class StorageFormat(EnvironmentVariable, type=str):  # pragma: no cover
+    """Engine to run on a single node of distribution."""
+
+    varname = "MODIN_STORAGE_FORMAT"
+    default = "Pandas"
+    choices = ("Pandas", "Hdk", "Cudf")
+
+
+class IsExperimental(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether to Turn on experimental features."""
+
+    varname = "MODIN_EXPERIMENTAL"
+
+
+class IsRayCluster(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether Modin is running on pre-initialized Ray cluster."""
+
+    varname = "MODIN_RAY_CLUSTER"
+
+
+class RayRedisAddress(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """Redis address to connect to when running in Ray cluster."""
+
+    varname = "MODIN_REDIS_ADDRESS"
+
+
+class RayRedisPassword(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """What password to use for connecting to Redis."""
+
+    varname = "MODIN_REDIS_PASSWORD"
+    default = secrets.token_hex(32)
+
+
+class CpuCount(EnvironmentVariable, type=int):  # pragma: no cover
+    """How many CPU cores to use during initialization of the Modin engine."""
+
+    varname = "MODIN_CPUS"
+
+    @classmethod
+    def _get_default(cls) -> int:
+        """
+        Get default value of the config.
+
+        Returns
+        -------
+        int
+        """
+        import multiprocessing
+
+        return multiprocessing.cpu_count()
+
+
+class GpuCount(EnvironmentVariable, type=int):  # pragma: no cover
+    """How may GPU devices to utilize across the whole distribution."""
+
+    varname = "MODIN_GPUS"
+
+
+class Memory(EnvironmentVariable, type=int):  # pragma: no cover
+    """
+    How much memory (in bytes) give to an execution engine.
+
+    Notes
+    -----
+    * In Ray case: the amount of memory to start the Plasma object store with.
+    * In Dask case: the amount of memory that is given to each worker depending on CPUs used.
+    """
+
+    varname = "MODIN_MEMORY"
+
+
+class NPartitions(EnvironmentVariable, type=int):  # pragma: no cover
+    """How many partitions to use for a Modin DataFrame (along each axis)."""
+
+    varname = "MODIN_NPARTITIONS"
+
+    @classmethod
+    def _put(cls, value: int) -> None:
+        """
+        Put specific value if NPartitions wasn't set by a user yet.
+
+        Parameters
+        ----------
+        value : int
+            Config value to set.
+
+        Notes
+        -----
+        This method is used to set NPartitions from cluster resources internally
+        and should not be called by a user.
+        """
+        if cls.get_value_source() == ValueSource.DEFAULT:
+            cls.put(value)
+
+    @classmethod
+    def _get_default(cls) -> int:
+        """
+        Get default value of the config.
+
+        Returns
+        -------
+        int
+        """
+        if StorageFormat.get() == "Cudf":
+            return GpuCount.get()
+        else:
+            return CpuCount.get()
+
+
+class HdkFragmentSize(EnvironmentVariable, type=int):  # pragma: no cover
+    """How big a fragment in HDK should be when creating a table (in rows)."""
+
+    varname = "MODIN_HDK_FRAGMENT_SIZE"
+
+
+class DoUseCalcite(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether to use Calcite for HDK queries execution."""
+
+    varname = "MODIN_USE_CALCITE"
+    default = True
+
+
+class TestDatasetSize(EnvironmentVariable, type=str):  # pragma: no cover
+    """Dataset size for running some tests."""
+
+    varname = "MODIN_TEST_DATASET_SIZE"
+    choices = ("Small", "Normal", "Big")
+
+
+class TrackFileLeaks(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether to track for open file handles leakage during testing."""
+
+    varname = "MODIN_TEST_TRACK_FILE_LEAKS"
+    # Turn off tracking on Windows by default because
+    # psutil's open_files() can be extremely slow on Windows (up to adding a few hours).
+    # see https://github.com/giampaolo/psutil/pull/597
+    default = sys.platform != "win32"
+
+
+class AsvImplementation(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """Allows to select a library that we will use for testing performance."""
+
+    varname = "MODIN_ASV_USE_IMPL"
+    choices = ("modin", "pandas")
+
+    default = "modin"
+
+
+class AsvDataSizeConfig(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """Allows to override default size of data (shapes)."""
+
+    varname = "MODIN_ASV_DATASIZE_CONFIG"
+    default = None
+
+
+class ProgressBar(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether or not to show the progress bar."""
+
+    varname = "MODIN_PROGRESS_BAR"
+    default = False
+
+    @classmethod
+    def enable(cls) -> None:
+        """Enable ``ProgressBar`` feature."""
+        cls.put(True)
+
+    @classmethod
+    def disable(cls) -> None:
+        """Disable ``ProgressBar`` feature."""
+        cls.put(False)
+
+    @classmethod
+    def put(cls, value: bool) -> None:
+        """
+        Set ``ProgressBar`` value only if synchronous benchmarking is disabled.
+
+        Parameters
+        ----------
+        value : bool
+            Config value to set.
+        """
+        if value and BenchmarkMode.get():
+            raise ValueError("ProgressBar isn't compatible with BenchmarkMode")
+        super().put(value)
+
+
+class BenchmarkMode(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether or not to perform computations synchronously."""
+
+    varname = "MODIN_BENCHMARK_MODE"
+    default = False
+
+    @classmethod
+    def put(cls, value: bool) -> None:
+        """
+        Set ``BenchmarkMode`` value only if progress bar feature is disabled.
+
+        Parameters
+        ----------
+        value : bool
+            Config value to set.
+        """
+        if value and ProgressBar.get():
+            raise ValueError("BenchmarkMode isn't compatible with ProgressBar")
+        super().put(value)
+
+
+class LogMode(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """Set ``LogMode`` value if users want to opt-in."""
+
+    varname = "MODIN_LOG_MODE"
+    choices = ("enable", "disable", "enable_api_only")
+    default = "disable"
+
+    @classmethod
+    def enable(cls) -> None:
+        """Enable all logging levels."""
+        cls.put("enable")
+
+    @classmethod
+    def disable(cls) -> None:
+        """Disable logging feature."""
+        cls.put("disable")
+
+    @classmethod
+    def enable_api_only(cls) -> None:
+        """Enable API level logging."""
+        cls.put("enable_api_only")
+
+
+class LogMemoryInterval(EnvironmentVariable, type=int):  # pragma: no cover
+    """Interval (in seconds) to profile memory utilization for logging."""
+
+    varname = "MODIN_LOG_MEMORY_INTERVAL"
+    default = 5
+
+    @classmethod
+    def put(cls, value: int) -> None:
+        """
+        Set ``LogMemoryInterval`` with extra checks.
+
+        Parameters
+        ----------
+        value : int
+            Config value to set.
+        """
+        if value <= 0:
+            raise ValueError(f"Log memory Interval should be > 0, passed value {value}")
+        super().put(value)
+
+    @classmethod
+    def get(cls) -> int:
+        """
+        Get ``LogMemoryInterval`` with extra checks.
+
+        Returns
+        -------
+        int
+        """
+        log_memory_interval = super().get()
+        assert log_memory_interval > 0, "`LogMemoryInterval` should be > 0"
+        return log_memory_interval
+
+
+class LogFileSize(EnvironmentVariable, type=int):  # pragma: no cover
+    """Max size of logs (in MBs) to store per Modin job."""
+
+    varname = "MODIN_LOG_FILE_SIZE"
+    default = 10
+
+    @classmethod
+    def put(cls, value: int) -> None:
+        """
+        Set ``LogFileSize`` with extra checks.
+
+        Parameters
+        ----------
+        value : int
+            Config value to set.
+        """
+        if value <= 0:
+            raise ValueError(f"Log file size should be > 0 MB, passed value {value}")
+        super().put(value)
+
+    @classmethod
+    def get(cls) -> int:
+        """
+        Get ``LogFileSize`` with extra checks.
+
+        Returns
+        -------
+        int
+        """
+        log_file_size = super().get()
+        assert log_file_size > 0, "`LogFileSize` should be > 0"
+        return log_file_size
+
+
+class PersistentPickle(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Whether serialization should be persistent."""
+
+    varname = "MODIN_PERSISTENT_PICKLE"
+    # When set to off, it allows faster serialization which is only
+    # valid in current run (i.e. useless for saving to disk).
+    # When set to on, Modin objects could be saved to disk and loaded
+    # but serialization/deserialization could take more time.
+    default = False
+
+
+class HdkLaunchParameters(EnvironmentVariable, type=dict):  # pragma: no cover
+    """
+    Additional command line options for the HDK engine.
+
+    Please visit OmniSci documentation for the description of available parameters:
+    https://docs.omnisci.com/installation-and-configuration/config-parameters#configuration-parameters-for-omniscidb
+    """
+
+    varname = "MODIN_HDK_LAUNCH_PARAMETERS"
+
+    @classmethod
+    def get(cls) -> dict:
+        """
+        Get the resulted command-line options.
+
+        Decode and merge specified command-line options with the default one.
+
+        Returns
+        -------
+        dict
+            Decoded and verified config value.
+        """
+        custom_parameters = super().get()
+        result = cls._get_default().copy()
+        result.update(
+            {key.replace("-", "_"): value for key, value in custom_parameters.items()}
+        )
+        return result
+
+    @classmethod
+    def _get_default(cls) -> Any:
+        """
+        Get default value of the config. Checks the pyhdk version and omits variables unsupported in prior versions.
+
+        Returns
+        -------
+        dict
+            Config keys and corresponding values.
+        """
+        if (default := getattr(cls, "default", None)) is None:
+            cls.default = default = {
+                "enable_union": 1,
+                "enable_columnar_output": 1,
+                "enable_lazy_fetch": 0,
+                "null_div_by_zero": 1,
+                "enable_watchdog": 0,
+                "enable_thrift_logs": 0,
+                "enable_multifrag_execution_result": 1,
+                "cpu_only": 1,
+            }
+
+            try:
+                import pyhdk
+
+                if version.parse(pyhdk.__version__) >= version.parse("0.6.1"):
+                    default["enable_lazy_dict_materialization"] = 0
+                    default["log_dir"] = "pyhdk_log"
+            except ImportError:
+                # if pyhdk is not available, do not show any additional options
+                pass
+        return default
+
+
+class MinPartitionSize(EnvironmentVariable, type=int):  # pragma: no cover
+    """
+    Minimum number of rows/columns in a single pandas partition split.
+
+    Once a partition for a pandas dataframe has more than this many elements,
+    Modin adds another partition.
+    """
+
+    varname = "MODIN_MIN_PARTITION_SIZE"
+    default = 32
+
+    @classmethod
+    def put(cls, value: int) -> None:
+        """
+        Set ``MinPartitionSize`` with extra checks.
+
+        Parameters
+        ----------
+        value : int
+            Config value to set.
+        """
+        if value <= 0:
+            raise ValueError(f"Min partition size should be > 0, passed value {value}")
+        super().put(value)
+
+    @classmethod
+    def get(cls) -> int:
+        """
+        Get ``MinPartitionSize`` with extra checks.
+
+        Returns
+        -------
+        int
+        """
+        min_partition_size = super().get()
+        assert min_partition_size > 0, "`min_partition_size` should be > 0"
+        return min_partition_size
+
+
+class TestReadFromSqlServer(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Set to true to test reading from SQL server."""
+
+    varname = "MODIN_TEST_READ_FROM_SQL_SERVER"
+    default = False
+
+
+class TestReadFromPostgres(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Set to true to test reading from Postgres."""
+
+    varname = "MODIN_TEST_READ_FROM_POSTGRES"
+    default = False
+
+
+class GithubCI(EnvironmentVariable, type=bool):  # pragma: no cover
+    """Set to true when running Modin in GitHub CI."""
+
+    varname = "MODIN_GITHUB_CI"
+    default = False
+
+
+class ModinNumpy(EnvWithSibilings, type=bool):  # pragma: no cover
+    """Set to true to use Modin's implementation of NumPy API."""
+
+    varname = "MODIN_NUMPY"
+    default = False
+
+    @classmethod
+    def _sibling(cls) -> type[EnvWithSibilings]:
+        """Get a parameter sibling."""
+        return ExperimentalNumPyAPI
+
+
+class ExperimentalNumPyAPI(EnvWithSibilings, type=bool):  # pragma: no cover
+    """
+    Set to true to use Modin's implementation of NumPy API.
+
+    This parameter is deprecated. Use ``ModinNumpy`` instead.
+    """
+
+    varname = "MODIN_EXPERIMENTAL_NUMPY_API"
+    default = False
+
+    @classmethod
+    def _sibling(cls) -> type[EnvWithSibilings]:
+        """Get a parameter sibling."""
+        return ModinNumpy
+
+
+# Let the parameter's handling logic know that this variable is deprecated and that
+# we should raise respective warnings
+ExperimentalNumPyAPI._deprecation_descriptor = DeprecationDescriptor(
+    ExperimentalNumPyAPI, ModinNumpy
+)
+
+
+class RangePartitioningGroupby(EnvWithSibilings, type=bool):  # pragma: no cover
+    """
+    Set to true to use Modin's range-partitioning group by implementation.
+
+    Experimental groupby is implemented using a range-partitioning technique,
+    note that it may not always work better than the original Modin's TreeReduce
+    and FullAxis implementations. For more information visit the according section
+    of Modin's documentation: TODO: add a link to the section once it's written.
+    """
+
+    varname = "MODIN_RANGE_PARTITIONING_GROUPBY"
+    default = False
+
+    @classmethod
+    def _sibling(cls) -> type[EnvWithSibilings]:
+        """Get a parameter sibling."""
+        return ExperimentalGroupbyImpl
+
+
+class ExperimentalGroupbyImpl(EnvWithSibilings, type=bool):  # pragma: no cover
+    """
+    Set to true to use Modin's range-partitioning group by implementation.
+
+    This parameter is deprecated. Use ``RangePartitioningGroupby`` instead.
+    """
+
+    varname = "MODIN_EXPERIMENTAL_GROUPBY"
+    default = False
+
+    @classmethod
+    def _sibling(cls) -> type[EnvWithSibilings]:
+        """Get a parameter sibling."""
+        return RangePartitioningGroupby
+
+
+# Let the parameter's handling logic know that this variable is deprecated and that
+# we should raise respective warnings
+ExperimentalGroupbyImpl._deprecation_descriptor = DeprecationDescriptor(
+    ExperimentalGroupbyImpl, RangePartitioningGroupby
+)
+
+
+class RangePartitioning(EnvironmentVariable, type=bool):  # pragma: no cover
+    """
+    Set to true to use Modin's range-partitioning implementation where possible.
+
+    Please refer to documentation for cases where enabling this options would be beneficial:
+    https://modin.readthedocs.io/en/stable/flow/modin/experimental/range_partitioning_groupby.html
+    """
+
+    varname = "MODIN_RANGE_PARTITIONING"
+    default = False
+
+
+class CIAWSSecretAccessKey(EnvironmentVariable, type=str):  # pragma: no cover
+    """Set to AWS_SECRET_ACCESS_KEY when running mock S3 tests for Modin in GitHub CI."""
+
+    varname = "AWS_SECRET_ACCESS_KEY"
+    default = "foobar_secret"
+
+
+class CIAWSAccessKeyID(EnvironmentVariable, type=str):  # pragma: no cover
+    """Set to AWS_ACCESS_KEY_ID when running mock S3 tests for Modin in GitHub CI."""
+
+    varname = "AWS_ACCESS_KEY_ID"
+    default = "foobar_key"
+
+
+class AsyncReadMode(EnvironmentVariable, type=bool):  # pragma: no cover
+    """
+    It does not wait for the end of reading information from the source.
+
+    It basically means, that the reading function only launches tasks for the dataframe
+    to be read/created, but not ensures that the construction is finalized by the time
+    the reading function returns a dataframe.
+
+    This option was brought to improve performance of reading/construction
+    of Modin DataFrames, however it may also:
+
+    1. Increase the peak memory consumption. Since the garbage collection of the
+    temporary objects created during the reading is now also lazy and will only
+    be performed when the reading/construction is actually finished.
+
+    2. Can break situations when the source is manually deleted after the reading
+    function returns a result, for example, when reading inside of a context-block
+    that deletes the file on ``__exit__()``.
+    """
+
+    varname = "MODIN_ASYNC_READ_MODE"
+    default = False
+
+
+class ReadSqlEngine(EnvironmentVariable, type=str):  # pragma: no cover
+    """Engine to run `read_sql`."""
+
+    varname = "MODIN_READ_SQL_ENGINE"
+    default = "Pandas"
+    choices = ("Pandas", "Connectorx")
+
+
+class LazyExecution(EnvironmentVariable, type=str):  # pragma: no cover
+    """
+    Lazy execution mode.
+
+    Supported values:
+        `Auto` - the execution mode is chosen by the engine for each operation (default value).
+        `On`   - the lazy execution is performed wherever it's possible.
+        `Off`  - the lazy execution is disabled.
+    """
+
+    varname = "MODIN_LAZY_EXECUTION"
+    choices = ("Auto", "On", "Off")
+    default = "Auto"
+
+
+class DocModule(EnvironmentVariable, type=ExactStr):  # pragma: no cover
+    """
+    The module to use that will be used for docstrings.
+
+    The value set here must be a valid, importable module. It should have
+    a `DataFrame`, `Series`, and/or several APIs directly (e.g. `read_csv`).
+    """
+
+    varname = "MODIN_DOC_MODULE"
+    default = "pandas"
+
+
+class DaskThreadsPerWorker(EnvironmentVariable, type=int):  # pragma: no cover
+    """Number of threads per Dask worker."""
+
+    varname = "MODIN_DASK_THREADS_PER_WORKER"
+    default = 1
+
+
+def _check_vars() -> None:  # pragma: no cover
+    """
+    Check validity of environment variables.
+
+    Look out for any environment variables that start with "MODIN_" prefix
+    that are unknown - they might be a typo, so warn a user.
+    """
+    valid_names = {
+        obj.varname
+        for obj in globals().values()
+        if isinstance(obj, type)
+        and issubclass(obj, EnvironmentVariable)
+        and not obj.is_abstract
+    }
+    valid_names.add("MODIN_PYTEST_CMD")
+    found_names = {name for name in os.environ if name.startswith("MODIN_")}
+    unknown = found_names - valid_names
+    deprecated: dict[str, DeprecationDescriptor] = {
+        obj.varname: obj._deprecation_descriptor
+        for obj in globals().values()
+        if isinstance(obj, type)
+        and issubclass(obj, EnvironmentVariable)
+        and not obj.is_abstract
+        and obj.varname is not None
+        and obj._deprecation_descriptor is not None
+    }
+    found_deprecated = found_names & deprecated.keys()
+    if unknown:
+        warnings.warn(  # noqa: B028
+            f"Found unknown environment variable{'s' if len(unknown) > 1 else ''},"
+            + f" please check {'their' if len(unknown) > 1 else 'its'} spelling: "
+            + ", ".join(sorted(unknown))
+        )
+    for depr_var in found_deprecated:
+        warnings.warn(  # noqa: B028
+            deprecated[depr_var].deprecation_message(use_envvar_names=True),
+            FutureWarning,
+        )
+
+
+_check_vars()
diff --git a/src/snowflake/snowpark/modin/config/pubsub.py b/src/snowflake/snowpark/modin/config/pubsub.py
new file mode 100644
index 00000000000..f83fe3647a8
--- /dev/null
+++ b/src/snowflake/snowpark/modin/config/pubsub.py
@@ -0,0 +1,447 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module houses ``Parameter`` class - base class for all configs."""
+
+import warnings
+from collections import defaultdict
+from enum import IntEnum
+from typing import TYPE_CHECKING, Any, Callable, DefaultDict, NamedTuple, Optional, cast
+
+if TYPE_CHECKING:  # pragma: no cover
+    from snowflake.snowpark.modin.config.envvars import EnvironmentVariable
+
+
+class DeprecationDescriptor:  # pragma: no cover
+    """
+    Describe deprecated parameter.
+
+    Parameters
+    ----------
+    parameter : type[Parameter]
+        Deprecated parameter.
+    new_parameter : type[Parameter], optional
+        If there's a replacement parameter for the deprecated one, specify it here.
+    when_removed : str, optional
+        If known, the exact release when the deprecated parameter is planned to be removed.
+    """
+
+    _parameter: type["Parameter"]
+    _new_parameter: Optional[type["Parameter"]]
+    _when_removed: str
+
+    def __init__(  # noqa: FIR100
+        self,
+        parameter: type["Parameter"],
+        new_parameter: Optional[type["Parameter"]] = None,
+        when_removed: Optional[str] = None,
+    ):
+        self._parameter = parameter
+        self._new_parameter = new_parameter
+        self._when_removed = "a future" if when_removed is None else when_removed
+
+    def deprecation_message(self, use_envvar_names: bool = False) -> str:
+        """
+        Generate a message to be used in a warning raised when using the deprecated parameter.
+
+        Parameters
+        ----------
+        use_envvar_names : bool, default: False
+            Whether to use environment variable names in the warning. If ``True``, both
+            ``self._parameter`` and ``self._new_parameter`` have to be a type of ``EnvironmentVariable``.
+
+        Returns
+        -------
+        str
+        """
+        name = (
+            cast("EnvironmentVariable", self._parameter).varname
+            if use_envvar_names
+            else self._parameter.__name__
+        )
+        msg = f"'{name}' is deprecated and will be removed in {self._when_removed} version."
+        if self._new_parameter is not None:
+            new_name = (
+                cast("EnvironmentVariable", self._new_parameter).varname
+                if use_envvar_names
+                else self._new_parameter.__name__
+            )
+            msg += f" Use '{new_name}' instead."
+        return msg
+
+
+class TypeDescriptor(NamedTuple):  # pragma: no cover
+    """
+    Class for config data manipulating of exact type.
+
+    Parameters
+    ----------
+    decode : callable
+        Callable to decode config value from the raw data.
+    normalize : callable
+        Callable to bring different config value variations to
+        the single form.
+    verify : callable
+        Callable to check that config value satisfies given config
+        type requirements.
+    help : str
+        Class description string.
+    """
+
+    decode: Callable[[str], object]
+    normalize: Callable[[object], object]
+    verify: Callable[[object], bool]
+    help: str
+
+
+class ExactStr(str):  # pragma: no cover
+    """Class to be used in type params where no transformations are needed."""
+
+
+_TYPE_PARAMS = {
+    str: TypeDescriptor(
+        decode=lambda value: value.strip().title(),
+        normalize=lambda value: str(value).strip().title(),
+        verify=lambda value: True,
+        help="a case-insensitive string",
+    ),
+    ExactStr: TypeDescriptor(
+        decode=lambda value: value,
+        normalize=lambda value: value,
+        verify=lambda value: True,
+        help="a string",
+    ),
+    bool: TypeDescriptor(
+        decode=lambda value: value.strip().lower() in {"true", "yes", "1"},
+        normalize=bool,
+        verify=lambda value: isinstance(value, bool)
+        or (
+            isinstance(value, str)
+            and value.strip().lower() in {"true", "yes", "1", "false", "no", "0"}
+        ),
+        help="a boolean flag (any of 'true', 'yes' or '1' in case insensitive manner is considered positive)",
+    ),
+    int: TypeDescriptor(
+        decode=lambda value: int(value.strip()),
+        normalize=int,  # type: ignore
+        verify=lambda value: isinstance(value, int)
+        or (isinstance(value, str) and value.strip().isdigit()),
+        help="an integer value",
+    ),
+    dict: TypeDescriptor(
+        decode=lambda value: {
+            key: int(val) if val.isdigit() else val
+            for key_value in value.split(",")
+            for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
+        },
+        normalize=lambda value: (
+            value
+            if isinstance(value, dict)
+            else {
+                key: int(val) if val.isdigit() else val
+                for key_value in str(value).split(",")
+                for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
+            }
+        ),
+        verify=lambda value: isinstance(value, dict)
+        or (
+            isinstance(value, str)
+            and all(
+                key_value.find("=") not in (-1, len(key_value) - 1)
+                for key_value in value.split(",")
+            )
+        ),
+        help="a sequence of KEY=VALUE values separated by comma (Example: 'KEY1=VALUE1,KEY2=VALUE2,KEY3=VALUE3')",
+    ),
+}
+
+# special marker to distinguish unset value from None value
+# as someone may want to use None as a real value for a parameter
+_UNSET = object()
+
+
+class ValueSource(IntEnum):  # noqa: PR01  # pragma: no cover
+    """Class that describes the method of getting the value for a parameter."""
+
+    # got from default, i.e. neither user nor configuration source had the value
+    DEFAULT = 0
+    # set by user
+    SET_BY_USER = 1
+    # got from parameter configuration source, like environment variable
+    GOT_FROM_CFG_SOURCE = 2
+
+
+class Parameter:  # pragma: no cover
+    """
+    Base class describing interface for configuration entities.
+
+    Attributes
+    ----------
+    choices : Optional[Sequence[str]]
+        Array with possible options of ``Parameter`` values.
+    type : str
+        String that denotes ``Parameter`` type.
+    default : Optional[Any]
+        ``Parameter`` default value.
+    is_abstract : bool, default: True
+        Whether or not ``Parameter`` is abstract.
+    _value_source : Optional[ValueSource]
+        Source of the ``Parameter`` value, should be set by
+        ``ValueSource``.
+    _deprecation_descriptor : Optional[DeprecationDescriptor]
+        Indicate whether this parameter is deprecated.
+    """
+
+    choices: Optional[tuple[str, ...]] = None
+    type = str
+    default: Optional[Any] = None
+    is_abstract = True
+    _value_source: Optional[ValueSource] = None
+    _value: Any = _UNSET
+    _subs: list = []
+    _once: DefaultDict[Any, list] = defaultdict(list)
+    _deprecation_descriptor: Optional[DeprecationDescriptor] = None
+
+    @classmethod
+    def _get_raw_from_config(cls) -> str:
+        """
+        Read the value from config storage.
+
+        Returns
+        -------
+        str
+            Config raw value.
+
+        Raises
+        ------
+        KeyError
+            If value is absent.
+
+        Notes
+        -----
+        Config storage can be config file or environment variable or whatever.
+        Method should be implemented in the child class.
+        """
+        raise NotImplementedError()
+
+    @classmethod
+    def get_help(cls) -> str:
+        """
+        Generate user-presentable help for the option.
+
+        Returns
+        -------
+        str
+
+        Notes
+        -----
+        Method should be implemented in the child class.
+        """
+        raise NotImplementedError()
+
+    def __init_subclass__(cls, type: Any, abstract: bool = False, **kw: dict):
+        """
+        Initialize subclass.
+
+        Parameters
+        ----------
+        type : Any
+            Type of the config.
+        abstract : bool, default: False
+            Whether config is abstract.
+        **kw : dict
+            Optional arguments for config initialization.
+        """
+        assert type in _TYPE_PARAMS, f"Unsupported variable type: {type}"
+        cls.type = type
+        cls.is_abstract = abstract
+        cls._value = _UNSET
+        cls._subs = []
+        cls._once = defaultdict(list)
+        super().__init_subclass__(**kw)
+
+    @classmethod
+    def subscribe(cls, callback: Callable) -> None:
+        """
+        Add `callback` to the `_subs` list and then execute it.
+
+        Parameters
+        ----------
+        callback : callable
+            Callable to execute.
+        """
+        cls._subs.append(callback)
+        callback(cls)
+
+    @classmethod
+    def _get_default(cls) -> Any:
+        """
+        Get default value of the config.
+
+        Returns
+        -------
+        Any
+        """
+        return cls.default
+
+    @classmethod
+    def get_value_source(cls) -> ValueSource:
+        """
+        Get value source of the config.
+
+        Returns
+        -------
+        ValueSource
+        """
+        if cls._value_source is None:
+            # dummy call to .get() to initialize the value
+            cls.get()
+        assert (
+            cls._value_source is not None
+        ), "_value_source must be initialized by now in get()"
+        return cls._value_source
+
+    @classmethod
+    def get(cls) -> Any:
+        """
+        Get config value.
+
+        Returns
+        -------
+        Any
+            Decoded and verified config value.
+        """
+        if cls._deprecation_descriptor is not None:
+            warnings.warn(  # noqa: B028
+                cls._deprecation_descriptor.deprecation_message(), FutureWarning
+            )
+        if cls._value is _UNSET:
+            # get the value from env
+            try:
+                raw = cls._get_raw_from_config()
+            except KeyError:
+                cls._value = cls._get_default()
+                cls._value_source = ValueSource.DEFAULT
+            else:
+                if not _TYPE_PARAMS[cls.type].verify(raw):
+                    raise ValueError(f"Unsupported raw value: {raw}")
+                cls._value = _TYPE_PARAMS[cls.type].decode(raw)
+                cls._value_source = ValueSource.GOT_FROM_CFG_SOURCE
+        return cls._value
+
+    @classmethod
+    def put(cls, value: Any) -> None:
+        """
+        Set config value.
+
+        Parameters
+        ----------
+        value : Any
+            Config value to set.
+        """
+        if cls._deprecation_descriptor is not None:
+            warnings.warn(  # noqa: B028
+                cls._deprecation_descriptor.deprecation_message(), FutureWarning
+            )
+        cls._check_callbacks(cls._put_nocallback(value))
+        cls._value_source = ValueSource.SET_BY_USER
+
+    @classmethod
+    def once(cls, onvalue: Any, callback: Callable) -> None:
+        """
+        Execute `callback` if config value matches `onvalue` value.
+
+        Otherwise accumulate callbacks associated with the given `onvalue`
+        in the `_once` container.
+
+        Parameters
+        ----------
+        onvalue : Any
+            Config value to set.
+        callback : callable
+            Callable that should be executed if config value matches `onvalue`.
+        """
+        onvalue = _TYPE_PARAMS[cls.type].normalize(onvalue)
+        if onvalue == cls.get():
+            callback(cls)
+        else:
+            cls._once[onvalue].append(callback)
+
+    @classmethod
+    def _put_nocallback(cls, value: Any) -> Any:
+        """
+        Set config value without executing callbacks.
+
+        Parameters
+        ----------
+        value : Any
+            Config value to set.
+
+        Returns
+        -------
+        Any
+            Replaced (old) config value.
+        """
+        if not _TYPE_PARAMS[cls.type].verify(value):
+            raise ValueError(f"Unsupported value: {value}")
+        value = _TYPE_PARAMS[cls.type].normalize(value)
+        oldvalue, cls._value = cls.get(), value
+        return oldvalue
+
+    @classmethod
+    def _check_callbacks(cls, oldvalue: Any) -> None:
+        """
+        Execute all needed callbacks if config value was changed.
+
+        Parameters
+        ----------
+        oldvalue : Any
+            Previous (old) config value.
+        """
+        if oldvalue == cls.get():
+            return
+        for callback in cls._subs:
+            callback(cls)
+        for callback in cls._once.pop(cls.get(), ()):
+            callback(cls)
+
+    @classmethod
+    def add_option(cls, choice: Any) -> Any:
+        """
+        Add a new choice for the parameter.
+
+        Parameters
+        ----------
+        choice : Any
+            New choice to add to the available choices.
+
+        Returns
+        -------
+        Any
+            Added choice normalized according to the parameter type.
+        """
+        if cls.choices is not None:
+            if not _TYPE_PARAMS[cls.type].verify(choice):
+                raise ValueError(f"Unsupported choice value: {choice}")
+            choice = _TYPE_PARAMS[cls.type].normalize(choice)
+            if choice not in cls.choices:
+                cls.choices += (choice,)
+            return choice
+        raise TypeError("Cannot add a choice to a parameter where choices is None")
+
+
+__all__ = ["Parameter"]
diff --git a/src/snowflake/snowpark/modin/conftest.py b/src/snowflake/snowpark/modin/conftest.py
new file mode 100644
index 00000000000..6ddcd4e8348
--- /dev/null
+++ b/src/snowflake/snowpark/modin/conftest.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd  # pragma: no cover
+import numpy as np  # pragma: no cover
+import pytest  # pragma: no cover
+
+import snowflake.snowpark.modin.plugin  # pragma: no cover # noqa: F401
+
+
+@pytest.fixture(autouse=True, scope="module")  # pragma: no cover
+def add_doctest_imports(doctest_namespace) -> None:  # pragma: no cover
+    """
+    Make `np` and `pd` names available for doctests.
+    """
+    doctest_namespace["np"] = np  # pragma: no cover
+    doctest_namespace["pd"] = pd  # pragma: no cover
diff --git a/src/snowflake/snowpark/modin/core/__init__.py b/src/snowflake/snowpark/modin/core/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/core/dataframe/__init__.py b/src/snowflake/snowpark/modin/core/dataframe/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/__init__.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/__init__.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/__init__.py
new file mode 100644
index 00000000000..90a7794a316
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/__init__.py
@@ -0,0 +1,66 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module default2pandas provides templates for a query compiler default-to-pandas methods."""
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.binary import (
+    BinaryDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.cat import (
+    CatDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.dataframe import (
+    DataFrameDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.datetime import (
+    DateTimeDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.groupby import (
+    GroupByDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.resample import (
+    ResampleDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.rolling import (
+    RollingDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.series import (
+    SeriesDefault,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.str import (
+    StrDefault,
+)
+
+__all__ = [
+    "DataFrameDefault",
+    "DateTimeDefault",
+    "SeriesDefault",
+    "StrDefault",
+    "BinaryDefault",
+    "ResampleDefault",
+    "RollingDefault",
+    "DefaultMethod",
+    "CatDefault",
+    "GroupByDefault",
+]
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/binary.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/binary.py
new file mode 100644
index 00000000000..4052c113b25
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/binary.py
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default binary functions builder class."""
+from typing import Any, Callable, Union
+
+import pandas
+from pandas._typing import AnyArrayLike, Scalar
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+
+
+class BinaryDefault(DefaultMethod):
+    """Build default-to-pandas methods which executes binary functions."""
+
+    @classmethod
+    def build_default_to_pandas(cls, fn: Callable, fn_name: str) -> Callable:
+        """
+        Build function that do fallback to pandas for passed binary `fn`.
+
+        Parameters
+        ----------
+        fn : callable
+            Binary function to apply to the casted to pandas frame and other operand.
+        fn_name : str
+            Function name which will be shown in default-to-pandas warning message.
+
+        Returns
+        -------
+        callable
+            Function that takes query compiler, does fallback to pandas and applies binary `fn`
+            to the casted to pandas frame.
+        """
+
+        def bin_ops_wrapper(
+            df: pandas.DataFrame,
+            other: Union[pandas.DataFrame, pandas.Series, Scalar, AnyArrayLike],
+            *args: Any,
+            **kwargs: Any
+        ) -> pandas.DataFrame:
+            """Apply specified binary function to the passed operands."""
+            squeeze_other = kwargs.pop("broadcast", False) or kwargs.pop(
+                "squeeze_other", False
+            )
+            squeeze_self = kwargs.pop("squeeze_self", False)
+
+            if squeeze_other:
+                other = other.squeeze(axis=1)
+
+            if squeeze_self:
+                df = df.squeeze(axis=1)
+
+            result = fn(df, other, *args, **kwargs)
+            if not isinstance(result, pandas.DataFrame):  # pragma: no cover
+                result = pandas.DataFrame(result)
+            return result
+
+        return super().build_default_to_pandas(bin_ops_wrapper, fn_name)
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/cat.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/cat.py
new file mode 100644
index 00000000000..6a239a7a55c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/cat.py
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default applied-on-category functions builder class."""
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.series import (
+    SeriesDefault,
+)
+
+
+class CatDefault(SeriesDefault):
+    """Builder for default-to-pandas methods which is executed under category accessor."""
+
+    @classmethod
+    def frame_wrapper(
+        cls, df: pandas.DataFrame
+    ) -> pandas.core.arrays.categorical.CategoricalAccessor:
+        """
+        Get category accessor of the passed frame.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+
+        Returns
+        -------
+        pandas.core.arrays.categorical.CategoricalAccessor
+        """
+        return df.squeeze(axis=1).cat
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/dataframe.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/dataframe.py
new file mode 100644
index 00000000000..f22f80f130b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/dataframe.py
@@ -0,0 +1,36 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default DataFrame functions builder class."""
+
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+
+# from modin.utils import _inherit_docstrings
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+
+@_inherit_docstrings(DefaultMethod)
+class DataFrameDefault(DefaultMethod):
+    DEFAULT_OBJECT_TYPE = pandas.DataFrame
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/datetime.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/datetime.py
new file mode 100644
index 00000000000..d7aefcd165e
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/datetime.py
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default applied-on-datetime functions builder class."""
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.series import (
+    SeriesDefault,
+)
+
+
+class DateTimeDefault(SeriesDefault):
+    """Builder for default-to-pandas methods which is executed under datetime accessor."""
+
+    @classmethod
+    def frame_wrapper(
+        cls, df: pandas.DataFrame
+    ) -> pandas.core.indexes.accessors.DatetimeProperties:
+        """
+        Get datetime accessor of the passed frame.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+
+        Returns
+        -------
+        pandas.core.indexes.accessors.DatetimeProperties
+        """
+        return df.squeeze(axis=1).dt
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/default.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/default.py
new file mode 100644
index 00000000000..b6d20f08a6c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/default.py
@@ -0,0 +1,279 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default functions builder class."""
+
+from typing import Any, Callable, Optional, Union
+
+import pandas
+from pandas.core.dtypes.common import is_list_like
+
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    is_property,
+    try_cast_to_pandas,
+)
+
+
+class ObjTypeDeterminer:
+    """
+    Class that routes work to the frame.
+
+    Provides an instance which forwards all of the `__getattribute__` calls
+    to an object under which `key` function is applied.
+    """
+
+    def __getattr__(self, key: str) -> Callable:
+        """
+        Build function that executes `key` function over passed frame.
+
+        Parameters
+        ----------
+        key : str
+
+        Returns
+        -------
+        callable
+            Function that takes DataFrame and executes `key` function on it.
+        """
+
+        def func(df: object, *args: Any, **kwargs: Any) -> Any:  # pragma: no cover
+            """Access specified attribute of the passed object and call it if it's callable."""
+            prop = getattr(df, key)
+            if callable(prop):
+                return prop(*args, **kwargs)
+            else:
+                return prop
+
+        return func
+
+
+class DefaultMethod:
+    """
+    Builder for default-to-pandas methods.
+
+    Attributes
+    ----------
+    OBJECT_TYPE : str
+        Object type name that will be shown in default-to-pandas warning message.
+    DEFAULT_OBJECT_TYPE : object
+        Default place to search for a function.
+    """
+
+    OBJECT_TYPE = "DataFrame"
+    DEFAULT_OBJECT_TYPE = ObjTypeDeterminer
+
+    # This function is pulled from the Operator class in modin/core/dataframe/algebra/operator.py
+    def __init__(self) -> None:
+        raise ValueError(  # pragma: no cover
+            "Please use {}.register instead of the constructor".format(
+                type(self).__name__
+            )
+        )
+
+    @classmethod
+    def get_func_name_for_registered_method(
+        cls, fn: Union[Callable, property, str]
+    ) -> str:
+        """
+        Function that takes in a Callable or a property and returns its name
+        """
+
+        if is_property(fn):
+            # when a property method without a name, fn_name will be something like
+            # "<property object at 0x7f8671e09d10>", here we use fget to get the name of the property. Note that this
+            # method is still not perfect because we cannot get the class name of the property, e.g., we can only get
+            # "hour" from series.dt.hour
+            fn_name = f"<property fget:{getattr(fn.fget, '__name__', 'noname')}>"  # type: ignore[union-attr]
+        else:
+            fn_name = getattr(fn, "__name__", str(fn))
+
+        return fn_name
+
+    @classmethod
+    def register(
+        cls,
+        func: Union[Callable, property, str],
+        obj_type: Optional[object] = None,
+        inplace: Optional[bool] = None,
+        fn_name: Optional[str] = None,
+    ) -> Callable:
+        """
+        Build function that do fallback to default pandas implementation for passed `func`.
+
+        Parameters
+        ----------
+        func : callable or str,
+            Function to apply to the casted to pandas frame or its property accesed
+            by ``cls.frame_wrapper``.
+        obj_type : object, optional
+            If `func` is a string with a function name then `obj_type` provides an
+            object to search function in.
+        inplace : bool, optional
+            If True return an object to which `func` was applied, otherwise return
+            the result of `func`.
+        fn_name : str, optional
+            Function name which will be shown in default-to-pandas warning message.
+            If not specified, name will be deducted from `func`.
+
+        Returns
+        -------
+        callable
+            Function that takes query compiler, does fallback to pandas and applies `func`
+            to the casted to pandas frame or its property accessed by ``cls.frame_wrapper``.
+        """
+
+        if isinstance(func, str):
+            if obj_type is None:
+                obj_type = cls.DEFAULT_OBJECT_TYPE
+            fn = getattr(obj_type, func)
+        else:
+            fn = func
+
+        if fn_name is None:
+            fn_name = cls.get_func_name_for_registered_method(func)
+
+        if type(fn) == property:
+            fn = cls.build_property_wrapper(fn)
+
+        def applyier(df: pandas.DataFrame, *args: Any, **kwargs: Any) -> Callable:
+            """
+            Apply target function to the casted to pandas frame.
+
+            This function is directly applied to the casted to pandas frame, executes target
+            function under it and processes result so it is possible to create a valid
+            query compiler from it.
+            """
+            args = try_cast_to_pandas(args)  # pragma: no cover
+            kwargs = try_cast_to_pandas(kwargs)  # pragma: no cover
+
+            # pandas default implementation doesn't know how to handle `dtypes` keyword argument
+            kwargs.pop("dtypes", None)
+            df = cls.frame_wrapper(df)
+            result = fn(df, *args, **kwargs)
+
+            if not isinstance(
+                result, pandas.Series
+            ) and not isinstance(  # pragma: no cover
+                result, pandas.DataFrame
+            ):
+                # When applying a DatetimeProperties or TimedeltaProperties function,
+                # if we don't specify the dtype for the DataFrame, the frame might
+                # get the wrong dtype, e.g. for to_pydatetime in
+                # https://github.com/modin-project/modin/issues/4436
+                astype_kwargs = {}
+                dtype = getattr(result, "dtype", None)
+                if dtype and isinstance(
+                    df,
+                    (
+                        pandas.core.indexes.accessors.DatetimeProperties,
+                        pandas.core.indexes.accessors.TimedeltaProperties,
+                    ),
+                ):
+                    astype_kwargs["dtype"] = dtype
+                result = (
+                    pandas.DataFrame(result, **astype_kwargs)
+                    if is_list_like(result)
+                    else pandas.DataFrame([result], **astype_kwargs)
+                )
+            if isinstance(result, pandas.Series):
+                if result.name is None:
+                    result.name = MODIN_UNNAMED_SERIES_LABEL
+                result = result.to_frame()
+
+            inplace_method = kwargs.get("inplace", False)
+            if inplace is not None:
+                inplace_method = inplace
+            return result if not inplace_method else df
+
+        return cls.build_default_to_pandas(applyier, fn_name)  # type: ignore[arg-type]
+
+    @classmethod
+    def build_property_wrapper(cls, prop: property) -> Callable:
+        """
+        Build function that accesses specified property of the frame.
+
+        Parameters
+        ----------
+        prop : str
+            Property name to access.
+
+        Returns
+        -------
+        callable
+            Function that takes DataFrame and returns its value of `prop` property.
+        """
+
+        def property_wrapper(df: Any) -> Any:
+            """Get specified property of the passed object."""
+            return prop.fget(df)  # type: ignore[misc]  # pragma: no cover
+
+        return property_wrapper
+
+    @classmethod
+    def build_default_to_pandas(cls, fn: Callable, fn_name: str) -> Callable:
+        """
+        Build function that do fallback to pandas for passed `fn`.
+
+        Parameters
+        ----------
+        fn : callable
+            Function to apply to the defaulted frame.
+        fn_name : str
+            Function name which will be shown in default-to-pandas warning message.
+
+        Returns
+        -------
+        callable
+            Method that does fallback to pandas and applies `fn` to the pandas frame.
+        """
+        fn.__name__ = f"<function {cls.OBJECT_TYPE}.{fn_name}>"
+
+        def wrapper(  # type: ignore[no-untyped-def]
+            self, *args: Any, **kwargs: Any
+        ) -> Callable:
+            """Do fallback to pandas for the specified function."""
+            return self.default_to_pandas(fn, *args, **kwargs)
+
+        return wrapper
+
+    @classmethod
+    def frame_wrapper(cls, df: pandas.DataFrame) -> pandas.DataFrame:
+        """
+        Extract frame property to apply function on.
+
+        This method is executed under casted to pandas frame right before applying
+        a function passed to `register`, which gives an ability to transform frame somehow
+        or access its properties, by overriding this method in a child class.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+
+        Returns
+        -------
+        pandas.DataFrame
+
+        Notes
+        -----
+        Being a base implementation, this particular method does nothing with passed frame.
+        """
+        return df
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/groupby.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/groupby.py
new file mode 100644
index 00000000000..ad5d96dc6a0
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/groupby.py
@@ -0,0 +1,728 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default GroupBy functions builder class."""
+from typing import Any, Callable, Optional, Union
+
+import pandas
+from pandas.core.dtypes.common import is_list_like
+
+# Defines a set of string names of functions that are executed in a transform-way in groupby
+from pandas.core.groupby.base import transformation_kernels
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    hashable,
+    is_property,
+)
+
+
+# FIXME: there is no sence of keeping `GroupBy` and `GroupByDefault` logic in a different
+# classes. They should be combined.
+class GroupBy:
+    """Builder for GroupBy aggregation functions."""
+
+    agg_aliases = [
+        "agg",
+        "dict_agg",
+        pandas.core.groupby.DataFrameGroupBy.agg,
+        pandas.core.groupby.DataFrameGroupBy.aggregate,
+    ]
+
+    @staticmethod
+    def is_transformation_kernel(agg_func: Any) -> bool:
+        """
+        Check whether a passed aggregation function is a transformation.
+
+        Transformation means that the result of the function will be broadcasted
+        to the frame's original shape.
+
+        Parameters
+        ----------
+        agg_func : Any
+
+        Returns
+        -------
+        bool
+        """
+        return (
+            hashable(agg_func) and agg_func in transformation_kernels
+        )  # pragma: no cover
+
+    @classmethod
+    def _call_groupby(
+        cls, df: Union[pandas.DataFrame, pandas.Series], *args: Any, **kwargs: Any
+    ) -> Union[pandas.core.groupby.DataFrameGroupBy, pandas.core.groupby.SeriesGroupBy]:
+        """Call .groupby() on passed `df`."""
+        return df.groupby(*args, **kwargs)  # pragma: no cover
+
+    @classmethod
+    def validate_by(cls, by: Any) -> Any:
+        """
+        Build valid `by` parameter for `pandas.DataFrame.groupby`.
+
+        Cast all DataFrames in `by` parameter to Series or list of Series in case
+        of multi-column frame.
+
+        Parameters
+        ----------
+        by : DateFrame, Series, index label or list of such
+            Object which indicates groups for GroupBy.
+
+        Returns
+        -------
+        Series, index label or list of such
+            By parameter with all DataFrames casted to Series.
+        """
+
+        def try_cast_series(df: Any) -> Any:  # pragma: no cover
+            """Cast one-column frame to Series."""
+            if isinstance(df, pandas.DataFrame):
+                df = df.squeeze(axis=1)
+            if not isinstance(df, pandas.Series):
+                return df
+            if df.name == MODIN_UNNAMED_SERIES_LABEL:
+                df.name = None
+            return df
+
+        if isinstance(by, pandas.DataFrame):
+            by = [try_cast_series(column) for _, column in by.items()]
+        elif isinstance(by, pandas.Series):
+            by = [try_cast_series(by)]
+        elif isinstance(by, list):
+            by = [try_cast_series(o) for o in by]
+        return by
+
+    @classmethod
+    def inplace_applyier_builder(
+        cls, key: Callable, func: Optional[Union[Callable, str]] = None
+    ) -> Callable:
+        """
+        Bind actual aggregation function to the GroupBy aggregation method.
+
+        Parameters
+        ----------
+        key : callable
+            Function that takes GroupBy object and evaluates passed aggregation function.
+        func : callable or str, optional
+            Function that takes DataFrame and aggregate its data. Will be applied
+            to each group at the grouped frame.
+
+        Returns
+        -------
+        callable,
+            Function that executes aggregation under GroupBy object.
+        """
+        inplace_args = [] if func is None else [func]
+
+        def inplace_applyier(  # pragma: no cover
+            grp: Union[
+                pandas.core.groupby.DataFrameGroupBy, pandas.core.groupby.SeriesGroupBy
+            ],
+            *func_args: Any,
+            **func_kwargs: Any,
+        ) -> Callable:
+            return key(grp, *inplace_args, *func_args, **func_kwargs)  # type: ignore[operator]     # pragma: no cover
+
+        return inplace_applyier
+
+    @classmethod
+    def get_func(cls, key: Callable, **kwargs: Any) -> Callable:
+        """
+        Extract aggregation function from groupby arguments.
+
+        Parameters
+        ----------
+        key : callable or str
+            Default aggregation function. If aggregation function is not specified
+            via groupby arguments, then `key` function is used.
+        **kwargs : dict
+            GroupBy arguments that may contain aggregation function.
+
+        Returns
+        -------
+        callable
+            Aggregation function.
+
+        Notes
+        -----
+        There are two ways of how groupby aggregation can be invoked:
+            1. Explicitly with query compiler method: `qc.groupby_sum()`.
+            2. By passing aggregation function as an argument: `qc.groupby_agg("sum")`.
+        Both are going to produce the same result, however in the first case actual aggregation
+        function can be extracted from the method name, while for the second only from the method arguments.
+        """
+        if "agg_func" in kwargs:
+            return cls.inplace_applyier_builder(key, kwargs["agg_func"])
+        elif "func_dict" in kwargs:
+            return cls.inplace_applyier_builder(key, kwargs["func_dict"])
+        else:
+            return cls.inplace_applyier_builder(key)
+
+    @classmethod
+    def build_aggregate_method(cls, key: Callable) -> Callable:
+        """
+        Build function for `QueryCompiler.groupby_agg` that can be executed as default-to-pandas.
+
+        Parameters
+        ----------
+        key : callable or str
+            Default aggregation function. If aggregation function is not specified
+            via groupby arguments, then `key` function is used.
+
+        Returns
+        -------
+        callable
+            Function that executes groupby aggregation.
+        """
+
+        def fn(
+            df: Union[pandas.DataFrame, pandas.Series],
+            by: Any,
+            axis: int,
+            groupby_kwargs: dict[str, Any],
+            agg_args: Any,
+            agg_kwargs: dict[str, Any],
+            **kwargs: Any,
+        ) -> Any:
+            """Group DataFrame and apply aggregation function to each group."""
+            by = cls.validate_by(by)
+
+            grp = cls._call_groupby(
+                df, by, axis=axis, **groupby_kwargs
+            )  # pragma: no cover
+            agg_func = cls.get_func(key, **kwargs)
+            result = agg_func(grp, *agg_args, **agg_kwargs)
+
+            return result
+
+        return fn
+
+    @classmethod
+    def build_groupby_reduce_method(cls, agg_func: Any) -> Callable:
+        """
+        Build function for `QueryCompiler.groupby_*` that can be executed as default-to-pandas.
+
+        Parameters
+        ----------
+        agg_func : callable or str
+            Default aggregation function. If aggregation function is not specified
+            via groupby arguments, then `agg_func` function is used.
+
+        Returns
+        -------
+        callable
+            Function that executes groupby aggregation.
+        """
+
+        def fn(
+            df: Union[pandas.DataFrame, pandas.Series],
+            by: Any,
+            axis: int,
+            groupby_kwargs: dict[str, Any],
+            agg_args: Any,
+            agg_kwargs: dict[str, Any],
+            drop: bool = False,
+            **kwargs: Any,
+        ) -> Any:
+            """Group DataFrame and apply aggregation function to each group."""
+            if not isinstance(by, (pandas.Series, pandas.DataFrame)):
+                by = cls.validate_by(by)
+                grp = cls._call_groupby(
+                    df, by, axis=axis, **groupby_kwargs
+                )  # pragma: no cover
+                grp_agg_func = cls.get_func(agg_func, **kwargs)
+                return grp_agg_func(
+                    grp,
+                    *agg_args,
+                    **agg_kwargs,
+                )
+
+            if isinstance(by, pandas.DataFrame):
+                by = by.squeeze(axis=1)
+            if (
+                drop
+                and isinstance(by, pandas.Series)
+                and by.name in df
+                and df[by.name].equals(by)
+            ):
+                by = [by.name]
+            if isinstance(by, pandas.DataFrame):
+                df = pandas.concat([df] + [by[[o for o in by if o not in df]]], axis=1)
+                by = list(by.columns)
+
+            groupby_kwargs = groupby_kwargs.copy()
+            as_index = groupby_kwargs.pop("as_index", True)
+            groupby_kwargs["as_index"] = True
+
+            grp = cls._call_groupby(
+                df, by, axis=axis, **groupby_kwargs
+            )  # pragma: no cover
+            func = cls.get_func(agg_func, **kwargs)
+            result = func(grp, *agg_args, **agg_kwargs)
+            method = kwargs.get("method")
+
+            if isinstance(result, pandas.Series):
+                result = result.to_frame(  # pragma: no cover
+                    MODIN_UNNAMED_SERIES_LABEL if result.name is None else result.name
+                )
+
+            if not as_index:
+                if isinstance(by, pandas.Series):
+                    # 1. If `drop` is True then 'by' Series represents a column from the
+                    #    source frame and so the 'by' is internal.
+                    # 2. If method is 'size' then any 'by' is considered to be internal.
+                    #    This is a hacky legacy from the ``groupby_size`` implementation:
+                    #    https://github.com/modin-project/modin/issues/3739
+                    internal_by = (by.name,) if drop or method == "size" else tuple()
+                else:
+                    internal_by = by
+
+                cls.handle_as_index_for_dataframe(
+                    result,
+                    internal_by,
+                    by_cols_dtypes=(
+                        df.index.dtypes.values
+                        if isinstance(df.index, pandas.MultiIndex)
+                        else (df.index.dtype,)
+                    ),
+                    by_length=len(by),
+                    drop=drop,
+                    method=method,
+                    inplace=True,
+                )
+
+            if result.index.name == MODIN_UNNAMED_SERIES_LABEL:
+                result.index.name = None
+
+            return result
+
+        return fn
+
+    @classmethod
+    def is_aggregate(cls, key: Union[Callable, str, property]) -> bool:
+        """Check whether `key` is an alias for pandas.GroupBy.aggregation method."""
+        return key in cls.agg_aliases
+
+    @classmethod
+    def build_property_method(cls, property: property) -> Callable:
+        """
+        Build function for `SnowflakeQueryCompiler.<property_name>` that can be executed as default-to-pandas
+
+        Parameters
+        ----------
+        property: property
+            property of groupby object.
+
+        Returns
+        -------
+        callable
+            Function that executes groupby aggregation and returns property.
+        """
+
+        def fn(
+            df: Union[pandas.DataFrame, pandas.Series],
+            by: Any,
+            axis: int,
+            groupby_kwargs: dict[str, Any],
+        ) -> Any:
+            """Group DataFrame and apply aggregation function to each group."""
+            by = cls.validate_by(by)  # pragma: no cover
+
+            grp = cls._call_groupby(
+                df, by, axis=axis, **groupby_kwargs
+            )  # pragma: no cover
+
+            return property.fget(grp)  # type: ignore[misc]
+
+        return fn
+
+    @classmethod
+    def build_groupby(cls, func: Union[Callable, property]) -> Callable:
+        """
+        Build function that groups DataFrame and applies aggregation function to the every group.
+
+        Parameters
+        ----------
+        func : callable or str or property
+            Default aggregation function. If aggregation function is not specified
+            via groupby arguments, then `func` function is used.
+
+        Returns
+        -------
+        callable
+            Function that takes pandas DataFrame and does GroupBy aggregation.
+        """
+
+        if is_property(func):
+            return cls.build_property_method(func)  # type: ignore[arg-type]
+        if cls.is_aggregate(func):
+            return cls.build_aggregate_method(func)  # type: ignore[arg-type]
+        return cls.build_groupby_reduce_method(
+            func
+        )  # pragma: no cover  # type: ignore[arg-type]
+
+    @classmethod
+    def handle_as_index_for_dataframe(
+        cls,
+        result: pandas.DataFrame,
+        internal_by_cols: Any,
+        by_cols_dtypes: Optional[Any] = None,
+        by_length: Optional[int] = None,
+        selection: Optional[Any] = None,
+        partition_idx: int = 0,
+        drop: bool = True,
+        method: Optional[str] = None,
+        inplace: bool = False,
+    ) -> pandas.DataFrame:
+        """
+        Handle `as_index=False` parameter for the passed GroupBy aggregation result.
+
+        Parameters
+        ----------
+        result : DataFrame
+            Frame containing GroupBy aggregation result computed with `as_index=True`
+            parameter (group names are located at the frame's index).
+        internal_by_cols : list-like
+            Internal 'by' columns.
+        by_cols_dtypes : list-like, optional
+            Data types of the internal 'by' columns. Required to do special casing
+            in case of categorical 'by'. If not specified, assume that there is no
+            categorical data in 'by'.
+        by_length : int, optional
+            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)
+            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.
+        selection : label or list of labels, optional
+            Set of columns that were explicitly selected for aggregation (for example
+            via dict-aggregation). If not specified assuming that aggregation was
+            applied to all of the available columns.
+        partition_idx : int, default: 0
+            Positional index of the current partition.
+        drop : bool, default: True
+            Indicates whether or not any of the `by` data came from the same frame.
+        method : str, optional
+            Name of the groupby function. This is a hint to be able to do special casing.
+            Note: this parameter is a legacy from the ``groupby_size`` implementation,
+            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.
+        inplace : bool, default: False
+            Modify the DataFrame in place (do not create a new object).
+
+        Returns
+        -------
+        DataFrame
+            GroupBy aggregation result with the considered `as_index=False` parameter.
+        """
+        if not inplace:
+            result = result.copy()
+
+        (
+            reset_index,
+            drop,
+            lvls_to_drop,
+            cols_to_drop,
+        ) = cls.handle_as_index(  # pragma: no cover
+            result_cols=result.columns,
+            result_index_names=result.index.names,
+            internal_by_cols=internal_by_cols,
+            by_cols_dtypes=by_cols_dtypes,
+            by_length=by_length,
+            selection=selection,
+            partition_idx=partition_idx,
+            drop=drop,
+            method=method,
+        )
+
+        if len(lvls_to_drop) > 0:
+            result.index = result.index.droplevel(lvls_to_drop)
+        if len(cols_to_drop) > 0:
+            result.drop(columns=cols_to_drop, inplace=True)
+        if reset_index:
+            result.reset_index(drop=drop, inplace=True)
+        return result
+
+    @staticmethod
+    def handle_as_index(
+        result_cols: pandas.Index,
+        result_index_names: Any,
+        internal_by_cols: Any,
+        by_cols_dtypes: Optional[Any] = None,
+        by_length: Optional[int] = None,
+        selection: Optional[Any] = None,
+        partition_idx: int = 0,
+        drop: bool = True,
+        method: Optional[str] = None,
+    ) -> tuple[bool, bool, list[int], Any]:
+        """
+        Compute hints to process ``as_index=False`` parameter for the GroupBy result.
+
+        This function resolves naming conflicts of the index levels to insert and the column labels
+        for the GroupBy result. The logic of this function assumes that the initial GroupBy result
+        was computed as ``as_index=True``.
+
+        Parameters
+        ----------
+        result_cols : pandas.Index
+            Columns of the GroupBy result.
+        result_index_names : list-like
+            Index names of the GroupBy result.
+        internal_by_cols : list-like
+            Internal 'by' columns.
+        by_cols_dtypes : list-like, optional
+            Data types of the internal 'by' columns. Required to do special casing
+            in case of categorical 'by'. If not specified, assume that there is no
+            categorical data in 'by'.
+        by_length : int, optional
+            Amount of keys to group on (including frame columns and external objects like list, Series, etc.)
+            If not specified, consider `by_length` to be equal ``len(internal_by_cols)``.
+        selection : label or list of labels, optional
+            Set of columns that were explicitly selected for aggregation (for example
+            via dict-aggregation). If not specified assuming that aggregation was
+            applied to all of the available columns.
+        partition_idx : int, default: 0
+            Positional index of the current partition.
+        drop : bool, default: True
+            Indicates whether or not any of the `by` data came from the same frame.
+        method : str, optional
+            Name of the groupby function. This is a hint to be able to do special casing.
+            Note: this parameter is a legacy from the ``groupby_size`` implementation,
+            it's a hacky one and probably will be removed in the future: https://github.com/modin-project/modin/issues/3739.
+
+        Returns
+        -------
+        reset_index : bool
+            Indicates whether to reset index to the default one (0, 1, 2 ... n) at this partition.
+        drop_index : bool
+            If `reset_index` is True, indicates whether to drop all index levels (True) or insert them into the
+            resulting columns (False).
+        lvls_to_drop : list of ints
+            Contains numeric indices of the levels of the result index to drop as intersected.
+        cols_to_drop : list of labels
+            Contains labels of the columns to drop from the result as intersected.
+
+        Examples
+        --------
+        >>> groupby_result = compute_groupby_without_processing_as_index_parameter()
+        >>> if not as_index:
+        >>>     reset_index, drop, lvls_to_drop, cols_to_drop = handle_as_index(**extract_required_params(groupby_result))
+        >>>     if len(lvls_to_drop) > 0:
+        >>>         groupby_result.index = groupby_result.index.droplevel(lvls_to_drop)
+        >>>     if len(cols_to_drop) > 0:
+        >>>         groupby_result = groupby_result.drop(columns=cols_to_drop)
+        >>>     if reset_index:
+        >>>         groupby_result_with_processed_as_index_parameter = groupby_result.reset_index(drop=drop)
+        >>> else:
+        >>>     groupby_result_with_processed_as_index_parameter = groupby_result
+        """
+        if by_length is None:
+            by_length = len(internal_by_cols)
+
+        reset_index = method != "transform" and (
+            by_length > 0 or selection is not None
+        )  # pragma: no cover
+
+        # If the method is "size" then the result contains only one unique named column
+        # and we don't have to worry about any naming conflicts, so inserting all of
+        # the "by" into the result (just a fast-path)
+        if method == "size":
+            return reset_index, False, [], []
+
+        # pandas logic of resolving naming conflicts is the following:
+        #   1. If any categorical is in 'by' and 'by' is multi-column, then the categorical
+        #      index is prioritized: drop intersected columns and insert all of the 'by' index
+        #      levels to the frame as columns.
+        #   2. Otherwise, aggregation result is prioritized: drop intersected index levels and
+        #      insert the filtered ones to the frame as columns.
+        if by_cols_dtypes is not None:
+            keep_index_levels = (
+                by_length > 1
+                and selection is None
+                and any(isinstance(x, pandas.CategoricalDtype) for x in by_cols_dtypes)
+            )
+        else:
+            keep_index_levels = False
+
+        # 1. We insert 'by'-columns to the result at the beginning of the frame and so only to the
+        #    first partition, if partition_idx != 0 we just drop the index. If there are no columns
+        #    that are required to drop (keep_index_levels is True) then we can exit here.
+        # 2. We don't insert 'by'-columns to the result if 'by'-data came from a different
+        #    frame (drop is False), there's only one exception for this rule: if the `method` is "size",
+        #    so if (drop is False) and method is not "size" we just drop the index and so can exit here.
+        if (not keep_index_levels and partition_idx != 0) or (
+            not drop and method != "size"
+        ):
+            return reset_index, True, [], []
+
+        if not isinstance(internal_by_cols, pandas.Index):
+            if not is_list_like(internal_by_cols):
+                internal_by_cols = [internal_by_cols]
+            internal_by_cols = pandas.Index(internal_by_cols)
+
+        internal_by_cols = (
+            internal_by_cols[
+                ~internal_by_cols.str.startswith(MODIN_UNNAMED_SERIES_LABEL, na=False)
+            ]
+            if hasattr(internal_by_cols, "str")
+            else internal_by_cols
+        )
+
+        if selection is not None and not isinstance(selection, pandas.Index):
+            selection = pandas.Index(selection)
+
+        lvls_to_drop: list[int] = []  # pragma: no cover
+        cols_to_drop: Any = []  # pragma: no cover
+
+        if not keep_index_levels:
+            # We want to insert only these internal-by-cols that are not presented
+            # in the result in order to not create naming conflicts
+            if selection is None:
+                cols_to_insert = frozenset(internal_by_cols) - frozenset(result_cols)
+            else:
+                cols_to_insert = frozenset(
+                    # We have to use explicit 'not in' check and not just difference
+                    # of sets because of specific '__contains__' operator in case of
+                    # scalar 'col' and MultiIndex 'selection'.
+                    col
+                    for col in internal_by_cols
+                    if col not in selection
+                )
+        else:
+            cols_to_insert = internal_by_cols
+            # We want to drop such internal-by-cols that are presented
+            # in the result in order to not create naming conflicts
+            cols_to_drop = frozenset(internal_by_cols) & frozenset(result_cols)
+
+        if partition_idx == 0:
+            lvls_to_drop = [
+                i
+                for i, name in enumerate(result_index_names)
+                if name not in cols_to_insert
+            ]
+        else:
+            lvls_to_drop = result_index_names
+
+        drop = False
+        if len(lvls_to_drop) == len(result_index_names):
+            drop = True
+            lvls_to_drop = []
+
+        return reset_index, drop, lvls_to_drop, cols_to_drop
+
+
+class SeriesGroupBy(GroupBy):
+    """Builder for GroupBy aggregation functions for Series."""
+
+    @classmethod
+    def _call_groupby(cls, df: pandas.DataFrame, *args: Any, **kwargs: Any) -> Callable:
+        """Call .groupby() on passed `df` squeezed to Series."""
+        # We can end up here by two means - either by "true" call
+        # like Series().groupby() or by df.groupby()[item].
+
+        if len(df.columns) == 1:  # pragma: no cover
+            # Series().groupby() case
+            return df.squeeze(axis=1).groupby(*args, **kwargs)  # pragma: no cover
+        # In second case surrounding logic will supplement grouping columns,
+        # so we need to drop them after grouping is over; our originally
+        # selected column is always the first, so use it
+        return df.groupby(*args, **kwargs)[df.columns[0]]  # pragma: no cover
+
+
+class GroupByDefault(DefaultMethod):
+    """Builder for default-to-pandas GroupBy aggregation functions."""
+
+    _groupby_cls = GroupBy
+
+    OBJECT_TYPE = "GroupBy"
+
+    @classmethod
+    def register(cls, func: Callable, **kwargs: Any) -> Callable:
+        """
+        Build default-to-pandas GroupBy aggregation function.
+
+        Parameters
+        ----------
+        func : callable or str
+            Default aggregation function. If aggregation function is not specified
+            via groupby arguments, then `func` function is used.
+        **kwargs : kwargs
+            Additional arguments that will be passed to function builder.
+
+        Returns
+        -------
+        callable
+            Functiom that takes query compiler and defaults to pandas to do GroupBy
+            aggregation.
+        """
+        return super().register(
+            cls._groupby_cls.build_groupby(func),
+            fn_name=cls.get_func_name_for_registered_method(func),
+            **kwargs,
+        )
+
+    # This specifies a `pandas.DataFrameGroupBy` method to pass the `agg_func` to,
+    # it's based on `how` to apply it. Going by pandas documentation:
+    #   1. `.aggregate(func)` applies func row/column wise.
+    #   2. `.apply(func)` applies func to a DataFrames, holding a whole group (group-wise).
+    #   3. `.transform(func)` is the same as `.apply()` but also broadcast the `func`
+    #      result to the group's original shape.
+    #   4. 'direct' mode means that the passed `func` has to be applied directly
+    #      to the `pandas.DataFrameGroupBy` object.
+    _aggregation_methods_dict = {
+        "axis_wise": pandas.core.groupby.DataFrameGroupBy.aggregate,
+        "group_wise": pandas.core.groupby.DataFrameGroupBy.apply,
+        "transform": pandas.core.groupby.DataFrameGroupBy.transform,
+        "direct": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),
+    }
+
+    @classmethod
+    def get_aggregation_method(cls, how: str) -> Callable:
+        """
+        Return `pandas.DataFrameGroupBy` method that implements the passed `how` UDF applying strategy.
+
+        Parameters
+        ----------
+        how : {"axis_wise", "group_wise", "transform"}
+            `how` parameter of the ``BaseQueryCompiler.groupby_agg``.
+
+        Returns
+        -------
+        callable(pandas.DataFrameGroupBy, callable, *args, **kwargs) -> [pandas.DataFrame | pandas.Series]
+
+        Notes
+        -----
+        Visit ``BaseQueryCompiler.groupby_agg`` doc-string for more information about `how` parameter.
+        """
+        return cls._aggregation_methods_dict[how]  # pragma: no cover
+
+
+class SeriesGroupByDefault(GroupByDefault):
+    """Builder for default-to-pandas GroupBy aggregation functions for Series."""
+
+    _groupby_cls = SeriesGroupBy
+
+    _aggregation_methods_dict = {
+        "axis_wise": pandas.core.groupby.SeriesGroupBy.aggregate,
+        "group_wise": pandas.core.groupby.SeriesGroupBy.apply,
+        "transform": pandas.core.groupby.SeriesGroupBy.transform,
+        "direct": lambda grp, func, *args, **kwargs: func(grp, *args, **kwargs),
+    }
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/resample.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/resample.py
new file mode 100644
index 00000000000..559c1a9cb3b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/resample.py
@@ -0,0 +1,106 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default Resamle functions builder class."""
+from typing import Any, Callable, Union
+
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+
+
+# FIXME: there is no sence of keeping `Resampler` and `ResampleDefault` logic in a different
+# classes. They should be combined.
+class Resampler:
+    """Builder class for resampled aggregation functions."""
+
+    @classmethod
+    def build_resample(cls, func: Union[Callable, property], squeeze_self: bool) -> Any:
+        """
+        Build function that resamples time-series data and does aggregation.
+
+        Parameters
+        ----------
+        func : callable
+            Aggregation function to execute under resampled frame.
+        squeeze_self : bool
+            Whether or not to squeeze frame before resampling.
+
+        Returns
+        -------
+        callable
+            Function that takes pandas DataFrame and applies aggregation
+            to resampled time-series data.
+        """
+
+        def fn(  # pragma: no cover
+            df: pandas.DataFrame,
+            resample_kwargs: dict[str, Any],
+            *args: Any,
+            **kwargs: Any
+        ) -> Any:
+            """Resample time-series data of the passed frame and apply specified aggregation."""
+            if squeeze_self:
+                df = df.squeeze(axis=1)
+            resampler = df.resample(**resample_kwargs)
+
+            if type(func) == property:
+                return func.fget(resampler)  # type: ignore[misc]       # pragma: no cover
+
+            return func(resampler, *args, **kwargs)  # type: ignore[operator]       # pragma: no cover
+
+        return fn
+
+
+class ResampleDefault(DefaultMethod):
+    """Builder for default-to-pandas resampled aggregation functions."""
+
+    OBJECT_TYPE = "Resampler"
+
+    @classmethod
+    def register(
+        cls, func: Callable, squeeze_self: bool = False, **kwargs: Any
+    ) -> Callable:
+        """
+        Build function that do fallback to pandas and aggregate resampled data.
+
+        Parameters
+        ----------
+        func : callable
+            Aggregation function to execute under resampled frame.
+        squeeze_self : bool, default: False
+            Whether or not to squeeze frame before resampling.
+        **kwargs : kwargs
+            Additional arguments that will be passed to function builder.
+
+        Returns
+        -------
+        callable
+            Function that takes query compiler and does fallback to pandas to resample
+            time-series data and apply aggregation on it.
+        """
+        return super().register(
+            Resampler.build_resample(func, squeeze_self),
+            fn_name=func.__name__,
+            **kwargs
+        )
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/rolling.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/rolling.py
new file mode 100644
index 00000000000..be662920c80
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/rolling.py
@@ -0,0 +1,159 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default Rolling functions builder class."""
+
+from typing import Any, Callable, Union
+
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+
+
+class RollingDefault(DefaultMethod):
+    """Builder for default-to-pandas aggregation on a rolling window functions."""
+
+    OBJECT_TYPE = "Rolling"
+
+    @classmethod
+    def _build_rolling(cls, func: Union[Callable, property]) -> Callable:
+        """
+        Build function that creates a rolling window and executes `func` on it.
+
+        Parameters
+        ----------
+        func : callable
+            Function to execute on a rolling window.
+
+        Returns
+        -------
+        callable
+            Function that takes pandas DataFrame and applies `func` on a rolling window.
+        """
+
+        def fn(  # pragma: no cover
+            df: pandas.DataFrame,
+            rolling_kwargs: dict[str, Any],
+            *args: Any,
+            **kwargs: Any
+        ) -> Any:
+            """Create rolling window for the passed frame and execute specified `func` on it."""
+            roller = df.rolling(**rolling_kwargs)  # pragma: no cover
+
+            if type(func) == property:  # pragma: no cover
+                return func.fget(roller)  # type: ignore[misc]      # pragma: no cover
+
+            return func(roller, *args, **kwargs)  # type: ignore[operator]      # pragma: no cover
+
+        return fn  # pragma: no cover
+
+    @classmethod
+    def register(cls, func: Callable, **kwargs: Any) -> Callable:
+        """
+        Build function that do fallback to pandas to apply `func` on a rolling window.
+
+        Parameters
+        ----------
+        func : callable
+            Function to execute on a rolling window.
+        **kwargs : kwargs
+            Additional arguments that will be passed to function builder.
+
+        Returns
+        -------
+        callable
+            Function that takes query compiler and defaults to pandas to apply aggregation
+            `func` on a rolling window.
+        """
+        return super().register(  # pragma: no cover
+            cls._build_rolling(func), fn_name=func.__name__, **kwargs
+        )
+
+
+class ExpandingDefault(DefaultMethod):
+    """Builder for default-to-pandas aggregation on an expanding window functions."""
+
+    OBJECT_TYPE = "Expanding"
+
+    @classmethod
+    def _build_expanding(
+        cls, func: Union[Callable, property], squeeze_self: bool
+    ) -> Callable:
+        """
+        Build function that creates an expanding window and executes `func` on it.
+
+        Parameters
+        ----------
+        func : callable
+            Function to execute on a expanding window.
+        squeeze_self : bool
+            Whether or not to squeeze frame before executing the window function.
+
+        Returns
+        -------
+        callable
+            Function that takes pandas DataFrame and applies `func` on a expanding window.
+        """
+
+        def fn(  # pragma: no cover
+            df: pandas.DataFrame, rolling_args: Any, *args: Any, **kwargs: Any
+        ) -> Any:
+            """Create rolling window for the passed frame and execute specified `func` on it."""
+            if squeeze_self:  # pragma: no cover
+                df = df.squeeze(axis=1)  # pragma: no cover
+            roller = df.expanding(*rolling_args)  # pragma: no cover
+
+            if type(func) == property:  # pragma: no cover
+                return func.fget(roller)  # type: ignore[misc]      # pragma: no cover
+
+            return func(roller, *args, **kwargs)  # type: ignore[operator]      # pragma: no cover
+
+        return fn  # pragma: no cover
+
+    @classmethod
+    def register(
+        cls, func: Callable, squeeze_self: bool = False, **kwargs: Any
+    ) -> Callable:
+        """
+        Build function that do fallback to pandas to apply `func` on a expanding window.
+
+        Parameters
+        ----------
+        func : callable
+            Function to execute on an expanding window.
+        squeeze_self : bool, default: False
+            Whether or not to squeeze frame before executing the window function.
+        **kwargs : kwargs
+            Additional arguments that will be passed to function builder.
+
+        Returns
+        -------
+        callable
+            Function that takes query compiler and defaults to pandas to apply aggregation
+            `func` on an expanding window.
+        """
+        return super().register(  # pragma: no cover
+            cls._build_expanding(func, squeeze_self=squeeze_self),
+            fn_name=func.__name__,
+            **kwargs
+        )
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/series.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/series.py
new file mode 100644
index 00000000000..455360cef59
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/series.py
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default Series functions builder class."""
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.default import (
+    DefaultMethod,
+)
+
+
+class SeriesDefault(DefaultMethod):
+    """Builder for default-to-pandas methods which is executed under Series."""
+
+    OBJECT_TYPE = "Series"
+
+    @classmethod
+    def frame_wrapper(cls, df: pandas.DataFrame) -> pandas.Series:
+        """
+        Squeeze passed DataFrame to be able to process Series-specific functions on it.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+            One-column DataFrame to squeeze.
+
+        Returns
+        -------
+        pandas.Series
+        """
+        return df.squeeze(axis=1)
diff --git a/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/str.py b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/str.py
new file mode 100644
index 00000000000..4de39a3c084
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/dataframe/algebra/default2pandas/str.py
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses default applied-on-str functions builder class."""
+import pandas
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.series import (
+    SeriesDefault,
+)
+
+
+class StrDefault(SeriesDefault):
+    """Builder for default-to-pandas methods which is executed under `str` accessor."""
+
+    @classmethod
+    def frame_wrapper(
+        cls, df: pandas.DataFrame
+    ) -> pandas.core.strings.accessor.StringMethods:
+        """
+        Get `str` accessor of the passed frame.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+
+        Returns
+        -------
+        pandas.core.strings.accessor.StringMethods
+        """
+        return df.squeeze(axis=1).str
diff --git a/src/snowflake/snowpark/modin/core/execution/__init__.py b/src/snowflake/snowpark/modin/core/execution/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/__init__.py b/src/snowflake/snowpark/modin/core/execution/dispatching/__init__.py
new file mode 100644
index 00000000000..e71962dddec
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/__init__.py
@@ -0,0 +1,22 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Modin's functionality related to dispatching to specific execution."""
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/factories/__init__.py b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/__init__.py
new file mode 100644
index 00000000000..b1326d67f33
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/__init__.py
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Factories responsible for dispatching to specific execution."""
+
+from snowflake.snowpark.modin.core.execution.dispatching.factories import (  # noqa: F401
+    factories,
+)
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/factories/baseio.py b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/baseio.py
new file mode 100644
index 00000000000..6825cf30c23
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/baseio.py
@@ -0,0 +1,680 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# based on https://raw.githubusercontent.com/modin-project/modin/master/modin/core/io/io.py
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Module houses `BaseIO` class.
+
+`BaseIO` is base class for IO classes, that stores IO functions.
+"""
+
+from collections import OrderedDict
+from typing import Any
+
+import pandas
+from pandas.util._decorators import doc
+
+from snowflake.snowpark.modin.plugin.compiler import BaseQueryCompiler
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+_doc_default_io_method = """
+{summary} using pandas.
+For parameters description please refer to pandas API.
+
+Returns
+-------
+{returns}
+"""
+
+_doc_returns_qc = """BaseQueryCompiler
+    QueryCompiler with read data."""
+
+_doc_returns_qc_or_parser = """BaseQueryCompiler or TextParser
+    QueryCompiler or TextParser with read data."""
+
+
+class BaseIO:
+    """Class for basic utils and default implementation of IO functions."""
+
+    query_compiler_cls: BaseQueryCompiler = None
+    frame_cls = None
+
+    @classmethod
+    def from_non_pandas(cls, *args, **kwargs):
+        """
+        Create a Modin `query_compiler` from a non-pandas `object`.
+
+        Parameters
+        ----------
+        *args : iterable
+            Positional arguments to be passed into `func`.
+        **kwargs : dict
+            Keyword arguments to be passed into `func`.
+        """
+        return None
+
+    @classmethod
+    def from_pandas(cls, df):
+        """
+        Create a Modin `query_compiler` from a `pandas.DataFrame`.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+            The pandas DataFrame to convert from.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the `pandas.DataFrame`.
+        """
+        return cls.query_compiler_cls.from_pandas(df, cls.frame_cls)
+
+    @classmethod
+    def from_arrow(cls, at):
+        """
+        Create a Modin `query_compiler` from a `pyarrow.Table`.
+
+        Parameters
+        ----------
+        at : Arrow Table
+            The Arrow Table to convert from.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the Arrow Table.
+        """
+        return cls.query_compiler_cls.from_arrow(at, cls.frame_cls)
+
+    @classmethod
+    def from_dataframe(cls, df):
+        """
+        Create a Modin QueryCompiler from a DataFrame supporting the DataFrame exchange protocol `__dataframe__()`.
+
+        Parameters
+        ----------
+        df : DataFrame
+            The DataFrame object supporting the DataFrame exchange protocol.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the DataFrame.
+        """
+        return cls.query_compiler_cls.from_dataframe(df, cls.frame_cls)
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_parquet, apilink="pandas.read_parquet")
+    @doc(
+        _doc_default_io_method,
+        summary="Load a parquet object from the file path, returning a query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_parquet(cls, **kwargs):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_parquet(
+                **kwargs,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
+    @doc(
+        _doc_default_io_method,
+        summary="Read a comma-separated values (CSV) file into query compiler",
+        returns=_doc_returns_qc_or_parser,
+    )
+    def read_csv(
+        cls,
+        filepath_or_buffer,
+        **kwargs,
+    ):  # noqa: PR01
+        pd_obj = pandas.read_csv(filepath_or_buffer, **kwargs)
+        if isinstance(pd_obj, pandas.DataFrame):
+            return cls.from_pandas(pd_obj)
+        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
+            # Overwriting the read method should return a Modin DataFrame for calls
+            # to __next__ and get_chunk
+            pd_read = pd_obj.read
+            pd_obj.read = lambda *args, **kw: cls.from_pandas(pd_read(*args, **kw))
+        return pd_obj
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_json, apilink="pandas.read_json")
+    @doc(
+        _doc_default_io_method,
+        summary="Convert a JSON string to query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_json(
+        cls,
+        **kwargs,
+    ):  # noqa: PR01
+        return cls.from_pandas(pandas.read_json(**kwargs))
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_gbq, apilink="pandas.read_gbq")
+    @doc(
+        _doc_default_io_method,
+        summary="Load data from Google BigQuery into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_gbq(
+        cls,
+        query: str,
+        project_id=None,
+        index_col=None,
+        col_order=None,
+        reauth=False,
+        auth_local_webserver=False,
+        dialect=None,
+        location=None,
+        configuration=None,
+        credentials=None,
+        use_bqstorage_api=None,
+        private_key=None,
+        verbose=None,
+        progress_bar_type=None,
+        max_results=None,
+    ):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_gbq(
+                query,
+                project_id=project_id,
+                index_col=index_col,
+                col_order=col_order,
+                reauth=reauth,
+                auth_local_webserver=auth_local_webserver,
+                dialect=dialect,
+                location=location,
+                configuration=configuration,
+                credentials=credentials,
+                use_bqstorage_api=use_bqstorage_api,
+                progress_bar_type=progress_bar_type,
+                max_results=max_results,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_html, apilink="pandas.read_html")
+    @doc(
+        _doc_default_io_method,
+        summary="Read HTML tables into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_html(
+        cls,
+        io,
+        match=".+",
+        flavor=None,
+        header=None,
+        index_col=None,
+        skiprows=None,
+        attrs=None,
+        parse_dates=False,
+        thousands=",",
+        encoding=None,
+        decimal=".",
+        converters=None,
+        na_values=None,
+        keep_default_na=True,
+        displayed_only=True,
+        **kwargs,
+    ):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_html(
+                io=io,
+                match=match,
+                flavor=flavor,
+                header=header,
+                index_col=index_col,
+                skiprows=skiprows,
+                attrs=attrs,
+                parse_dates=parse_dates,
+                thousands=thousands,
+                encoding=encoding,
+                decimal=decimal,
+                converters=converters,
+                na_values=na_values,
+                keep_default_na=keep_default_na,
+                displayed_only=displayed_only,
+                **kwargs,
+            )[0]
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
+    @doc(
+        _doc_default_io_method,
+        summary="Read text from clipboard into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_clipboard(cls, sep=r"\s+", **kwargs):  # pragma: no cover # noqa: PR01
+        return cls.from_pandas(pandas.read_clipboard(sep=sep, **kwargs))
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_excel, apilink="pandas.read_excel")
+    @doc(
+        _doc_default_io_method,
+        summary="Read an Excel file into query compiler",
+        returns="""BaseQueryCompiler or dict/OrderedDict :
+    QueryCompiler or OrderedDict/dict with read data.""",
+    )
+    def read_excel(
+        cls,
+        io,
+        sheet_name=0,
+        header=0,
+        names=None,
+        index_col=None,
+        usecols=None,
+        squeeze=False,
+        dtype=None,
+        engine=None,
+        converters=None,
+        true_values=None,
+        false_values=None,
+        skiprows=None,
+        nrows=None,
+        na_values=None,
+        keep_default_na=True,
+        verbose=False,
+        parse_dates=False,
+        date_parser=None,
+        thousands=None,
+        comment=None,
+        skip_footer=0,
+        skipfooter=0,
+        convert_float=True,
+        mangle_dupe_cols=True,
+        na_filter=True,
+        **kwds,
+    ):  # noqa: PR01
+        if skip_footer != 0:
+            skipfooter = skip_footer
+        intermediate = pandas.read_excel(
+            io,
+            sheet_name=sheet_name,
+            header=header,
+            names=names,
+            index_col=index_col,
+            usecols=usecols,
+            squeeze=squeeze,
+            dtype=dtype,
+            engine=engine,
+            converters=converters,
+            true_values=true_values,
+            false_values=false_values,
+            skiprows=skiprows,
+            nrows=nrows,
+            na_values=na_values,
+            keep_default_na=keep_default_na,
+            verbose=verbose,
+            parse_dates=parse_dates,
+            date_parser=date_parser,
+            thousands=thousands,
+            comment=comment,
+            skipfooter=skipfooter,
+            convert_float=convert_float,
+            mangle_dupe_cols=mangle_dupe_cols,
+            na_filter=na_filter,
+            **kwds,
+        )
+        if isinstance(intermediate, (OrderedDict, dict)):
+            parsed = type(intermediate)()
+            for key in intermediate.keys():
+                parsed[key] = cls.from_pandas(intermediate.get(key))
+            return parsed
+        else:
+            return cls.from_pandas(intermediate)
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_hdf, apilink="pandas.read_hdf")
+    @doc(
+        _doc_default_io_method,
+        summary="Read data from hdf store into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_hdf(
+        cls,
+        path_or_buf,
+        key=None,
+        mode: str = "r",
+        errors: str = "strict",
+        where=None,
+        start=None,
+        stop=None,
+        columns=None,
+        iterator=False,
+        chunksize=None,
+        **kwargs,
+    ):  # noqa: PR01
+        from modin.pandas.io import HDFStore
+
+        modin_store = isinstance(path_or_buf, HDFStore)
+        if modin_store:
+            path_or_buf._return_modin_dataframe = False
+        df = pandas.read_hdf(
+            path_or_buf,
+            key=key,
+            mode=mode,
+            columns=columns,
+            errors=errors,
+            where=where,
+            start=start,
+            stop=stop,
+            iterator=iterator,
+            chunksize=chunksize,
+            **kwargs,
+        )
+        if modin_store:
+            path_or_buf._return_modin_dataframe = True
+
+        return cls.from_pandas(df)
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_feather, apilink="pandas.read_feather")
+    @doc(
+        _doc_default_io_method,
+        summary="Load a feather-format object from the file path into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_feather(
+        cls,
+        path,
+        **kwargs,
+    ):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_feather(
+                path,
+                **kwargs,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_stata, apilink="pandas.read_stata")
+    @doc(
+        _doc_default_io_method,
+        summary="Read Stata file into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_stata(
+        cls,
+        filepath_or_buffer,
+        **kwargs,
+    ):  # noqa: PR01
+        return cls.from_pandas(pandas.read_stata(filepath_or_buffer, **kwargs))
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_sas, apilink="pandas.read_sas")
+    @doc(
+        _doc_default_io_method,
+        summary="Read SAS files stored as either XPORT or SAS7BDAT format files\ninto query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_sas(
+        cls,
+        filepath_or_buffer,
+        format=None,
+        index=None,
+        encoding=None,
+        chunksize=None,
+        iterator=False,
+        **kwargs,
+    ):  # pragma: no cover # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_sas(
+                filepath_or_buffer,
+                format=format,
+                index=index,
+                encoding=encoding,
+                chunksize=chunksize,
+                iterator=iterator,
+                **kwargs,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_pickle, apilink="pandas.read_pickle")
+    @doc(
+        _doc_default_io_method,
+        summary="Load pickled pandas object (or any object) from file into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_pickle(
+        cls,
+        filepath_or_buffer,
+        **kwargs,
+    ):  # noqa: PR01
+
+        return cls.from_pandas(
+            pandas.read_pickle(
+                filepath_or_buffer,
+                **kwargs,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_sql, apilink="pandas.read_sql")
+    @doc(
+        _doc_default_io_method,
+        summary="Read SQL query or database table into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_sql(
+        cls,
+        sql,
+        con,
+        index_col=None,
+        coerce_float=True,
+        params=None,
+        parse_dates=None,
+        columns=None,
+        chunksize=None,
+    ):  # noqa: PR01
+        # if isinstance(con, ModinDatabaseConnection):
+        #    con = con.get_connection()
+        return cls.from_pandas(
+            pandas.read_sql(
+                sql,
+                con,
+                index_col=index_col,
+                coerce_float=coerce_float,
+                params=params,
+                parse_dates=parse_dates,
+                columns=columns,
+                chunksize=chunksize,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_fwf, apilink="pandas.read_fwf")
+    @doc(
+        _doc_default_io_method,
+        summary="Read a table of fixed-width formatted lines into query compiler",
+        returns=_doc_returns_qc_or_parser,
+    )
+    def read_fwf(
+        cls, filepath_or_buffer, colspecs="infer", widths=None, infer_nrows=100, **kwds
+    ):  # noqa: PR01
+        pd_obj = pandas.read_fwf(
+            filepath_or_buffer,
+            colspecs=colspecs,
+            widths=widths,
+            infer_nrows=infer_nrows,
+            **kwds,
+        )
+        if isinstance(pd_obj, pandas.DataFrame):
+            return cls.from_pandas(pd_obj)
+        if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
+            # Overwriting the read method should return a Modin DataFrame for calls
+            # to __next__ and get_chunk
+            pd_read = pd_obj.read
+            pd_obj.read = lambda *args, **kwargs: cls.from_pandas(
+                pd_read(*args, **kwargs)
+            )
+        return pd_obj
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_sql_table, apilink="pandas.read_sql_table")
+    @doc(
+        _doc_default_io_method,
+        summary="Read SQL database table into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_sql_table(
+        cls,
+        table_name,
+        con,
+        schema=None,
+        index_col=None,
+        coerce_float=True,
+        parse_dates=None,
+        columns=None,
+        chunksize=None,
+    ):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_sql_table(
+                table_name,
+                con,
+                schema=schema,
+                index_col=index_col,
+                coerce_float=coerce_float,
+                parse_dates=parse_dates,
+                columns=columns,
+                chunksize=chunksize,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_sql_query, apilink="pandas.read_sql_query")
+    @doc(
+        _doc_default_io_method,
+        summary="Read SQL query into query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_sql_query(
+        cls,
+        sql,
+        con,
+        **kwargs,
+    ):  # noqa: PR01
+        return cls.from_pandas(
+            pandas.read_sql_query(
+                sql,
+                con,
+                **kwargs,
+            )
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.read_spss, apilink="pandas.read_spss")
+    @doc(
+        _doc_default_io_method,
+        summary="Load an SPSS file from the file path, returning a query compiler",
+        returns=_doc_returns_qc,
+    )
+    def read_spss(cls, path, usecols, convert_categoricals):  # noqa: PR01
+        return cls.from_pandas(pandas.read_spss(path, usecols, convert_categoricals))
+
+    @classmethod
+    @_inherit_docstrings(pandas.DataFrame.to_sql, apilink="pandas.DataFrame.to_sql")
+    def to_sql(
+        cls,
+        qc,
+        name,
+        con,
+        schema=None,
+        if_exists="fail",
+        index=True,
+        index_label=None,
+        chunksize=None,
+        dtype=None,
+        method=None,
+    ):  # noqa: PR01
+        """
+        Write records stored in a DataFrame to a SQL database using pandas.
+
+        For parameters description please refer to pandas API.
+        """
+        df = qc.to_pandas()
+        df.to_sql(
+            name=name,
+            con=con,
+            schema=schema,
+            if_exists=if_exists,
+            index=index,
+            index_label=index_label,
+            chunksize=chunksize,
+            dtype=dtype,
+            method=method,
+        )
+
+    @classmethod
+    @_inherit_docstrings(
+        pandas.DataFrame.to_pickle, apilink="pandas.DataFrame.to_pickle"
+    )
+    def to_pickle(
+        cls,
+        obj: Any,
+        filepath_or_buffer,
+        **kwargs,
+    ):  # noqa: PR01, D200
+        """
+        Pickle (serialize) object to file.
+        """
+        if isinstance(obj, BaseQueryCompiler):
+            obj = obj.to_pandas()
+
+        return pandas.to_pickle(
+            obj,
+            filepath_or_buffer=filepath_or_buffer,
+            **kwargs,
+        )
+
+    @classmethod
+    @_inherit_docstrings(pandas.DataFrame.to_csv, apilink="pandas.DataFrame.to_csv")
+    def to_csv(cls, obj, **kwargs):  # noqa: PR01
+        """
+        Write object to a comma-separated values (CSV) file using pandas.
+
+        For parameters description please refer to pandas API.
+        """
+        if isinstance(obj, BaseQueryCompiler):
+            obj = obj.to_pandas()
+
+        return obj.to_csv(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(
+        pandas.DataFrame.to_parquet, apilink="pandas.DataFrame.to_parquet"
+    )
+    def to_parquet(cls, obj, **kwargs):  # noqa: PR01
+        """
+        Write object to the binary parquet format using pandas.
+
+        For parameters description please refer to pandas API.
+        """
+        if isinstance(obj, BaseQueryCompiler):
+            obj = obj.to_pandas()
+
+        return obj.to_parquet(**kwargs)
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py
new file mode 100644
index 00000000000..d8180c94dae
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py
@@ -0,0 +1,287 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Contain IO dispatcher class.
+
+Dispatcher routes the work to execution-specific functions.
+"""
+
+from snowflake.snowpark.modin.core.execution.dispatching.factories import factories
+from snowflake.snowpark.modin.core.execution.dispatching.factories.factories import (
+    PandasOnSnowflakeFactory,
+)
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+
+class FactoryNotFoundError(AttributeError):
+    """
+    ``FactoryNotFound`` exception class.
+
+    Raise when no matching factory could be found.
+    """
+
+    pass
+
+
+class StubIoEngine:
+    """
+    IO-Engine that does nothing more than raise NotImplementedError when any method is called.
+
+    Parameters
+    ----------
+    factory_name : str
+        Factory name, which will be reflected in error messages.
+
+    Notes
+    -----
+    Used for testing purposes.
+    """
+
+    def __init__(self, factory_name="") -> None:
+        self.factory_name = factory_name or "Unknown"
+
+    def __getattr__(self, name):
+        """
+        Return a function that raises `NotImplementedError` for the `name` method.
+
+        Parameters
+        ----------
+        name : str
+            Method name to indicate in `NotImplementedError`.
+
+        Returns
+        -------
+        callable
+        """
+
+        def stub(*args, **kw):
+            raise NotImplementedError(
+                f"Method {self.factory_name}.{name} is not implemented"
+            )
+
+        return stub
+
+
+class StubFactory(factories.BaseFactory):
+    """
+    Factory that does nothing more than raise NotImplementedError when any method is called.
+
+    Notes
+    -----
+    Used for testing purposes.
+    """
+
+    io_cls = StubIoEngine()
+
+    @classmethod
+    def set_failing_name(cls, factory_name):
+        """
+        Fill in `.io_cls` class attribute with ``StubIoEngine`` engine.
+
+        Parameters
+        ----------
+        factory_name : str
+            Name to pass to the ``StubIoEngine`` constructor.
+        """
+        cls.io_cls = StubIoEngine(factory_name)
+        return cls
+
+
+class FactoryDispatcher:
+    """
+    Class that routes IO-work to the factories.
+
+    This class is responsible for keeping selected factory up-to-date and dispatching
+    calls of IO-functions to its actual execution-specific implementations.
+    """
+
+    __factory: factories.BaseFactory = None
+
+    @classmethod
+    def get_factory(cls) -> factories.BaseFactory:
+        """Get current factory."""
+        if cls.__factory is None:
+            # set default factory to be PandasOnPythonFactory, TODO: replace with Snowflake factory
+            # lazy initialize
+            cls.__factory = PandasOnSnowflakeFactory()
+            cls.__factory.prepare()
+        return cls.__factory
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._from_pandas)
+    def from_pandas(cls, df):
+        return cls.get_factory()._from_pandas(df)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._from_arrow)
+    def from_arrow(cls, at):
+        return cls.get_factory()._from_arrow(at)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._from_non_pandas)
+    def from_non_pandas(cls, *args, **kwargs):
+        return cls.get_factory()._from_non_pandas(*args, **kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._from_dataframe)
+    def from_dataframe(cls, *args, **kwargs):
+        return cls.get_factory()._from_dataframe(*args, **kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_parquet)
+    def read_parquet(cls, **kwargs):
+        return cls.get_factory()._read_parquet(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_csv)
+    def read_csv(cls, **kwargs):
+        return cls.get_factory()._read_csv(**kwargs)
+
+    # @classmethod
+    # #@_inherit_docstrings(factories.ExperimentalPandasOnRayFactory._read_csv_glob)
+    # def read_csv_glob(cls, **kwargs):
+    #     return cls.get_factory()._read_csv_glob(**kwargs)
+    #
+    # @classmethod
+    # @_inherit_docstrings(
+    #     factories.ExperimentalPandasOnRayFactory._read_pickle_distributed
+    # )
+    # def read_pickle_distributed(cls, **kwargs):
+    #     return cls.get_factory()._read_pickle_distributed(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_json)
+    def read_json(cls, **kwargs):
+        return cls.get_factory()._read_json(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_gbq)
+    def read_gbq(cls, **kwargs):
+        return cls.get_factory()._read_gbq(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_html)
+    def read_html(cls, **kwargs):
+        return cls.get_factory()._read_html(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_clipboard)
+    def read_clipboard(cls, **kwargs):
+        return cls.get_factory()._read_clipboard(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_excel)
+    def read_excel(cls, **kwargs):
+        return cls.get_factory()._read_excel(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_hdf)
+    def read_hdf(cls, **kwargs):
+        return cls.get_factory()._read_hdf(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_feather)
+    def read_feather(cls, **kwargs):
+        return cls.get_factory()._read_feather(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_stata)
+    def read_stata(cls, **kwargs):
+        return cls.get_factory()._read_stata(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_sas)
+    def read_sas(cls, **kwargs):  # pragma: no cover
+        return cls.get_factory()._read_sas(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_pickle)
+    def read_pickle(cls, **kwargs):
+        return cls.get_factory()._read_pickle(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_sql)
+    def read_sql(cls, **kwargs):
+        return cls.get_factory()._read_sql(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_fwf)
+    def read_fwf(cls, **kwargs):
+        return cls.get_factory()._read_fwf(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_sql_table)
+    def read_sql_table(cls, **kwargs):
+        return cls.get_factory()._read_sql_table(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_sql_query)
+    def read_sql_query(cls, **kwargs):
+        return cls.get_factory()._read_sql_query(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_spss)
+    def read_spss(cls, **kwargs):
+        return cls.get_factory()._read_spss(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._to_sql)
+    def to_sql(cls, *args, **kwargs):
+        return cls.get_factory()._to_sql(*args, **kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._to_pickle)
+    def to_pickle(cls, *args, **kwargs):
+        return cls.get_factory()._to_pickle(*args, **kwargs)
+
+    # @classmethod
+    # @_inherit_docstrings(
+    #     factories.ExperimentalPandasOnRayFactory._to_pickle_distributed
+    # )
+    # def to_pickle_distributed(cls, *args, **kwargs):
+    #     return cls.get_factory()._to_pickle_distributed(*args, **kwargs)
+
+    # @classmethod
+    # @_inherit_docstrings(factories.ExperimentalPandasOnRayFactory._read_custom_text)
+    # def read_custom_text(cls, **kwargs):
+    #     return cls.get_factory()._read_custom_text(**kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._to_csv)
+    def to_csv(cls, *args, **kwargs):
+        return cls.get_factory()._to_csv(*args, **kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._to_parquet)
+    def to_parquet(cls, *args, **kwargs):
+        return cls.get_factory()._to_parquet(*args, **kwargs)
+
+    # Snowflake added methods
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._read_snowflake)
+    def read_snowflake(cls, *args, **kwargs):
+        return cls.get_factory()._read_snowflake(*args, **kwargs)
+
+    @classmethod
+    @_inherit_docstrings(factories.BaseFactory._to_snowflake)
+    def to_snowflake(cls, *args, **kwargs):
+        return cls.get_factory()._to_snowflake(*args, **kwargs)
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py
new file mode 100644
index 00000000000..72c2847c9ff
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py
@@ -0,0 +1,466 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Module contains Factories for all of the supported Modin executions.
+
+Factory is a bridge between calls of IO function from high-level API and its
+actual implementation in the execution, bound to that factory. Each execution is represented
+with a Factory class.
+"""
+
+import re
+import typing
+
+import pandas
+from pandas.util._decorators import doc
+
+from snowflake.snowpark.modin.core.execution.dispatching.factories.baseio import BaseIO
+from snowflake.snowpark.modin.plugin.io.snow_io import PandasOnSnowflakeIO
+
+_doc_abstract_factory_class = """
+Abstract {role} factory which allows to override the IO module easily.
+
+This class is responsible for dispatching calls of IO-functions to its
+actual execution-specific implementations.
+
+Attributes
+----------
+io_cls : BaseIO
+    IO module class of the underlying execution. The place to dispatch calls to.
+"""
+
+_doc_factory_class = """
+Factory of {execution_name} execution.
+
+This class is responsible for dispatching calls of IO-functions to its
+actual execution-specific implementations.
+
+Attributes
+----------
+io_cls : {execution_name}IO
+    IO module class of the underlying execution. The place to dispatch calls to.
+"""
+
+_doc_factory_prepare_method = """
+Initialize Factory.
+
+Fills in `.io_cls` class attribute with {io_module_name} lazily.
+"""
+
+_doc_io_method_raw_template = """
+Build query compiler from {source}.
+
+Parameters
+----------
+{params}
+
+Returns
+-------
+QueryCompiler
+    Query compiler of the selected storage format.
+"""
+
+_doc_io_method_template = (
+    _doc_io_method_raw_template
+    + """
+See Also
+--------
+modin.pandas.{method}
+"""
+)
+
+_doc_io_method_all_params = """*args : args
+    Arguments to pass to the QueryCompiler builder method.
+**kwargs : kwargs
+    Arguments to pass to the QueryCompiler builder method."""
+
+_doc_io_method_kwargs_params = """**kwargs : kwargs
+    Arguments to pass to the QueryCompiler builder method."""
+
+
+types_dictionary = {"pandas": {"category": pandas.CategoricalDtype}}
+
+
+class FactoryInfo(typing.NamedTuple):
+    """
+    Structure that stores information about factory.
+
+    Parameters
+    ----------
+    engine : str
+        Name of underlying execution engine.
+    partition : str
+        Name of the partition format.
+    experimental : bool
+        Whether underlying engine is experimental-only.
+    """
+
+    engine: str
+    partition: str
+    experimental: bool
+
+
+class NotRealFactory(Exception):
+    """
+    ``NotRealFactory`` exception class.
+
+    Raise when no matching factory could be found.
+    """
+
+    pass
+
+
+@doc(_doc_abstract_factory_class, role="")
+class BaseFactory:
+    io_cls: type[BaseIO] = None  # The module where the I/O functionality exists.
+
+    @classmethod
+    def get_info(cls) -> FactoryInfo:
+        """
+        Get information about current factory.
+
+        Notes
+        -----
+        It parses factory name, so it must be conformant with how ``FactoryDispatcher``
+        class constructs factory names.
+        """
+        try:
+            experimental, partition, engine = re.match(
+                r"^(Experimental)?(.*)On(.*)Factory$", cls.__name__
+            ).groups()
+        except AttributeError:
+            raise NotRealFactory()
+        return FactoryInfo(
+            engine=engine, partition=partition, experimental=bool(experimental)
+        )
+
+    @classmethod
+    @doc(
+        _doc_factory_prepare_method,
+        io_module_name="an underlying execution's IO-module",
+    )
+    def prepare(cls):
+        raise NotImplementedError("Subclasses of BaseFactory must implement prepare")
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="pandas DataFrame",
+        params="df : pandas.DataFrame",
+        method="utils.from_pandas",
+    )
+    def _from_pandas(cls, df):
+        return cls.io_cls.from_pandas(df)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="Arrow Table",
+        params="at : pyarrow.Table",
+        method="utils.from_arrow",
+    )
+    def _from_arrow(cls, at):
+        return cls.io_cls.from_arrow(at)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a non-pandas object (dict, list, np.array etc...)",
+        params=_doc_io_method_all_params,
+        method="utils.from_non_pandas",
+    )
+    def _from_non_pandas(cls, *args, **kwargs):
+        return cls.io_cls.from_non_pandas(*args, **kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a DataFrame object supporting exchange protocol `__dataframe__()`",
+        params=_doc_io_method_all_params,
+        method="utils.from_dataframe",
+    )
+    def _from_dataframe(cls, *args, **kwargs):
+        return cls.io_cls.from_dataframe(*args, **kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a Parquet file",
+        params=_doc_io_method_kwargs_params,
+        method="read_parquet",
+    )
+    def _read_parquet(cls, **kwargs):
+        return cls.io_cls.read_parquet(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a CSV file",
+        params=_doc_io_method_kwargs_params,
+        method="read_csv",
+    )
+    def _read_csv(cls, **kwargs):
+        return cls.io_cls.read_csv(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a JSON file",
+        params=_doc_io_method_kwargs_params,
+        method="read_json",
+    )
+    def _read_json(cls, **kwargs):
+        return cls.io_cls.read_json(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a Google BigQuery",
+        params=_doc_io_method_kwargs_params,
+        method="read_gbq",
+    )
+    def _read_gbq(cls, **kwargs):
+        return cls.io_cls.read_gbq(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="an HTML document",
+        params=_doc_io_method_kwargs_params,
+        method="read_html",
+    )
+    def _read_html(cls, **kwargs):
+        return cls.io_cls.read_html(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="clipboard",
+        params=_doc_io_method_kwargs_params,
+        method="read_clipboard",
+    )
+    def _read_clipboard(cls, **kwargs):  # pragma: no cover
+        return cls.io_cls.read_clipboard(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="an Excel file",
+        params=_doc_io_method_kwargs_params,
+        method="read_excel",
+    )
+    def _read_excel(cls, **kwargs):
+        return cls.io_cls.read_excel(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="an HDFStore",
+        params=_doc_io_method_kwargs_params,
+        method="read_hdf",
+    )
+    def _read_hdf(cls, **kwargs):
+        return cls.io_cls.read_hdf(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a feather-format object",
+        params=_doc_io_method_kwargs_params,
+        method="read_feather",
+    )
+    def _read_feather(cls, **kwargs):
+        return cls.io_cls.read_feather(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a Stata file",
+        params=_doc_io_method_kwargs_params,
+        method="read_stata",
+    )
+    def _read_stata(cls, **kwargs):
+        return cls.io_cls.read_stata(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a SAS file",
+        params=_doc_io_method_kwargs_params,
+        method="read_sas",
+    )
+    def _read_sas(cls, **kwargs):  # pragma: no cover
+        return cls.io_cls.read_sas(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a pickled Modin or pandas DataFrame",
+        params=_doc_io_method_kwargs_params,
+        method="read_pickle",
+    )
+    def _read_pickle(cls, **kwargs):
+        return cls.io_cls.read_pickle(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a SQL query or database table",
+        params=_doc_io_method_kwargs_params,
+        method="read_sql",
+    )
+    def _read_sql(cls, **kwargs):
+        return cls.io_cls.read_sql(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a table of fixed-width formatted lines",
+        params=_doc_io_method_kwargs_params,
+        method="read_fwf",
+    )
+    def _read_fwf(cls, **kwargs):
+        return cls.io_cls.read_fwf(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a SQL database table",
+        params=_doc_io_method_kwargs_params,
+        method="read_sql_table",
+    )
+    def _read_sql_table(cls, **kwargs):
+        return cls.io_cls.read_sql_table(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="a SQL query",
+        params=_doc_io_method_kwargs_params,
+        method="read_sql_query",
+    )
+    def _read_sql_query(cls, **kwargs):
+        return cls.io_cls.read_sql_query(**kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="an SPSS file",
+        params=_doc_io_method_kwargs_params,
+        method="read_spss",
+    )
+    def _read_spss(cls, **kwargs):
+        return cls.io_cls.read_spss(**kwargs)
+
+    @classmethod
+    def _to_sql(cls, *args, **kwargs):
+        """
+        Write query compiler content to a SQL database.
+
+        Parameters
+        ----------
+        *args : args
+            Arguments to the writer method.
+        **kwargs : kwargs
+            Arguments to the writer method.
+        """
+        return cls.io_cls.to_sql(*args, **kwargs)
+
+    @classmethod
+    def _to_pickle(cls, *args, **kwargs):
+        """
+        Pickle query compiler object.
+
+        Parameters
+        ----------
+        *args : args
+            Arguments to the writer method.
+        **kwargs : kwargs
+            Arguments to the writer method.
+        """
+        return cls.io_cls.to_pickle(*args, **kwargs)
+
+    @classmethod
+    def _to_csv(cls, *args, **kwargs):
+        """
+        Write query compiler content to a CSV file.
+
+        Parameters
+        ----------
+        *args : args
+            Arguments to pass to the writer method.
+        **kwargs : kwargs
+            Arguments to pass to the writer method.
+        """
+        return cls.io_cls.to_csv(*args, **kwargs)
+
+    @classmethod
+    def _to_parquet(cls, *args, **kwargs):
+        """
+        Write query compiler content to a parquet file.
+
+        Parameters
+        ----------
+        *args : args
+            Arguments to pass to the writer method.
+        **kwargs : kwargs
+            Arguments to pass to the writer method.
+        """
+        return cls.io_cls.to_parquet(*args, **kwargs)
+
+    # following are snowflake specific functions, could also basically subclass BaseFactory into SnowflakeFactory an
+    # add the methods there.
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="read from Snowflake table",
+        params=_doc_io_method_kwargs_params,
+        method="read_snowflake",
+    )
+    def _read_snowflake(cls, *args, **kwargs):
+        return cls.io_cls.read_snowflake(*args, **kwargs)
+
+    @classmethod
+    @doc(
+        _doc_io_method_template,
+        source="save to Snowflake table",
+        params=_doc_io_method_kwargs_params,
+        method="to_snowflake",
+    )
+    def _to_snowflake(cls, *args, **kwargs):
+        return cls.io_cls.to_snowflake(*args, **kwargs)
+
+    @classmethod
+    def _to_local(cls, *args, **kwargs):
+        return cls.io_cls.to_local(*args, **kwargs)
+
+    @classmethod
+    def _to_remote(cls, *args, **kwargs):
+        return cls.io_cls.to_remote(*args, **kwargs)
+
+
+@doc(_doc_factory_class, backend_name="PandasOnSnowflake", execution_name="Snowflake")
+class PandasOnSnowflakeFactory(BaseFactory):
+    @classmethod
+    @doc(_doc_factory_prepare_method, io_module_name="``PandasOnSnowflakeIO``")
+    def prepare(cls):
+        cls.io_cls = PandasOnSnowflakeIO
diff --git a/src/snowflake/snowpark/modin/core/execution/dispatching/factories/pandasframe.py b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/pandasframe.py
new file mode 100644
index 00000000000..d3fce3a8fb2
--- /dev/null
+++ b/src/snowflake/snowpark/modin/core/execution/dispatching/factories/pandasframe.py
@@ -0,0 +1,3491 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Module contains class PandasDataframe.
+
+PandasDataframe is a parent abstract class for any dataframe class
+for pandas storage format.
+"""
+import datetime
+from collections import OrderedDict
+from collections.abc import Hashable
+from enum import Enum
+from typing import Callable, Optional, Union
+
+import numpy as np
+import pandas
+from pandas._libs.lib import no_default
+from pandas._typing import npt
+from pandas.api.types import is_object_dtype
+from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.common import is_list_like, is_numeric_dtype
+from pandas.core.indexes.api import Index, RangeIndex, ensure_index
+
+from snowflake.snowpark.modin.pandas.indexing import is_range_like
+from snowflake.snowpark.modin.pandas.utils import (
+    check_both_not_none,
+    is_full_grab_slice,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,  # pragma: no cover
+)
+
+
+# from https://github.com/modin-project/modin/blob/master/modin/core/dataframe/base/dataframe/utils.py
+class Axis(Enum):  # noqa: PR01
+    """
+    An enum that represents the `axis` argument provided to the algebra operators.
+    The enum has 3 values - ROW_WISE to represent the row axis, COL_WISE to represent the
+    column axis, and CELL_WISE to represent no axis. ROW_WISE operations iterate over the rows
+    COL_WISE operations over the columns, and CELL_WISE operations over any of the partitioning
+    schemes that are supported in Modin (row-wise, column-wise, or block-wise).
+    """
+
+    ROW_WISE = 0
+    COL_WISE = 1
+    CELL_WISE = None
+
+
+class JoinType(Enum):  # noqa: PR01
+    """
+    An enum that represents the `join_type` argument provided to the algebra operators.
+    The enum has 4 values - INNER to represent inner joins, LEFT to represent left joins, RIGHT to
+    represent right joins, and OUTER to represent outer joins.
+    """
+
+    INNER = "inner"
+    LEFT = "left"
+    RIGHT = "right"
+    OUTER = "outer"
+
+
+# from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
+# from modin.core.storage_formats.pandas.utils import get_length_list
+# from modin.error_message import ErrorMessage
+# from modin.core.storage_formats.pandas.parsers import (
+#     find_common_type_cat as find_common_type,
+# )
+# from modin.core.dataframe.base.dataframe.dataframe import ModinDataframe
+# from modin.core.dataframe.base.dataframe.utils import (
+#     Axis,
+#     JoinType,
+# )
+# from modin.core.dataframe.pandas.dataframe.utils import build_sort_functions
+#
+# if TYPE_CHECKING:
+#     from modin.core.dataframe.base.interchange.dataframe_protocol.dataframe import (
+#         ProtocolDataframe,
+#     )
+#     from pandas._typing import npt
+#
+# from modin.pandas.indexing import is_range_like
+# from modin.pandas.utils import is_full_grab_slice, check_both_not_none
+# from modin.logging import ClassLogger
+# from modin.utils import MODIN_UNNAMED_SERIES_LABEL
+
+
+def lazy_metadata_decorator(apply_axis=None, axis_arg=-1, transpose=False):
+    """
+    Lazily propagate metadata for the ``PandasDataframe``.
+
+    This decorator first adds the minimum required reindexing operations
+    to each partition's queue of functions to be lazily applied for
+    each PandasDataframe in the arguments by applying the function
+    run_f_on_minimally_updated_metadata. The decorator also sets the
+    flags for deferred metadata synchronization on the function result
+    if necessary.
+
+    Parameters
+    ----------
+    apply_axis : str, default: None
+        The axes on which to apply the reindexing operations to the `self._partitions` lazily.
+        Case None: No lazy metadata propagation.
+        Case "both": Add reindexing operations on both axes to partition queue.
+        Case "opposite": Add reindexing operations complementary to given axis.
+        Case "rows": Add reindexing operations on row axis to partition queue.
+    axis_arg : int, default: -1
+        The index or column axis.
+    transpose : bool, default: False
+        Boolean for if a transpose operation is being used.
+
+    Returns
+    -------
+    Wrapped Function.
+    """
+
+    def decorator(f):
+        from functools import wraps
+
+        @wraps(f)
+        def run_f_on_minimally_updated_metadata(self, *args, **kwargs):
+            for obj in (
+                [self]
+                + [o for o in args if isinstance(o, PandasDataframe)]
+                + [v for v in kwargs.values() if isinstance(v, PandasDataframe)]
+                + [
+                    d
+                    for o in args
+                    if isinstance(o, list)
+                    for d in o
+                    if isinstance(d, PandasDataframe)
+                ]
+                + [
+                    d
+                    for _, o in kwargs.items()
+                    if isinstance(o, list)
+                    for d in o
+                    if isinstance(d, PandasDataframe)
+                ]
+            ):
+                if apply_axis == "both":
+                    if obj._deferred_index and obj._deferred_column:
+                        obj._propagate_index_objs(axis=None)
+                    elif obj._deferred_index:
+                        obj._propagate_index_objs(axis=0)
+                    elif obj._deferred_column:
+                        obj._propagate_index_objs(axis=1)
+                elif apply_axis == "opposite":
+                    if "axis" not in kwargs:
+                        axis = args[axis_arg]
+                    else:
+                        axis = kwargs["axis"]
+                    if axis == 0 and obj._deferred_column:
+                        obj._propagate_index_objs(axis=1)
+                    elif axis == 1 and obj._deferred_index:
+                        obj._propagate_index_objs(axis=0)
+                elif apply_axis == "rows":
+                    obj._propagate_index_objs(axis=0)
+            result = f(self, *args, **kwargs)
+            if apply_axis is None and not transpose:
+                result._deferred_index = self._deferred_index
+                result._deferred_column = self._deferred_column
+            elif apply_axis is None and transpose:
+                result._deferred_index = self._deferred_column
+                result._deferred_column = self._deferred_index
+            elif apply_axis == "opposite":
+                if axis == 0:
+                    result._deferred_index = self._deferred_index
+                else:
+                    result._deferred_column = self._deferred_column
+            elif apply_axis == "rows":
+                result._deferred_column = self._deferred_column
+            return result
+
+        return run_f_on_minimally_updated_metadata
+
+    return decorator
+
+
+class PandasDataframe:
+    """
+    An abstract class that represents the parent class for any pandas storage format dataframe class.
+
+    This class provides interfaces to run operations on dataframe partitions.
+
+    Parameters
+    ----------
+    partitions : np.ndarray
+        A 2D NumPy array of partitions.
+    index : sequence, optional
+        The index for the dataframe. Converted to a ``pandas.Index``.
+        Is computed from partitions on demand if not specified.
+    columns : sequence, optional
+        The columns object for the dataframe. Converted to a ``pandas.Index``.
+        Is computed from partitions on demand if not specified.
+    row_lengths : list, optional
+        The length of each partition in the rows. The "height" of
+        each of the block partitions. Is computed if not provided.
+    column_widths : list, optional
+        The width of each partition in the columns. The "width" of
+        each of the block partitions. Is computed if not provided.
+    dtypes : pandas.Series, optional
+        The data types for the dataframe columns.
+    """
+
+    _partition_mgr_cls = None
+    _query_compiler_cls = None  # PandasQueryCompiler
+    # These properties flag whether or not we are deferring the metadata synchronization
+    _deferred_index = False
+    _deferred_column = False
+
+    @pandas.util.cache_readonly
+    def __constructor__(self):
+        """
+        Create a new instance of this object.
+
+        Returns
+        -------
+        PandasDataframe
+        """
+        return type(self)
+
+    def __init__(
+        self,
+        partitions,
+        index=None,
+        columns=None,
+        row_lengths=None,
+        column_widths=None,
+        dtypes=None,
+    ) -> None:
+        self._partitions = partitions
+        self._index_cache = ensure_index(index) if index is not None else None
+        self._columns_cache = ensure_index(columns) if columns is not None else None
+        self._row_lengths_cache = row_lengths
+        self._column_widths_cache = column_widths
+        self._dtypes = dtypes
+
+        self._validate_axes_lengths()
+        self._filter_empties(compute_metadata=False)
+
+    def _validate_axes_lengths(self):
+        """Validate that labels are split correctly if split is known."""
+        pass
+        # if self._row_lengths_cache is not None and len(self.index) > 0:
+        #     # An empty frame can have 0 rows but a nonempty index. If the frame
+        #     # does have rows, the number of rows must equal the size of the
+        #     # index.
+        #     num_rows = sum(self._row_lengths_cache)
+        #     if num_rows > 0:
+        #         ErrorMessage.catch_bugs_and_request_email(
+        #             num_rows != len(self._index_cache),
+        #             f"Row lengths: {num_rows} != {len(self._index_cache)}",
+        #         )
+        #     ErrorMessage.catch_bugs_and_request_email(
+        #         any(val < 0 for val in self._row_lengths_cache),
+        #         f"Row lengths cannot be negative: {self._row_lengths_cache}",
+        #     )
+        # if self._column_widths_cache is not None and len(self.columns) > 0:
+        #     # An empty frame can have 0 column but a nonempty column index. If
+        #     # the frame does have columns, the number of columns must equal the
+        #     # size of the columns.
+        #     num_columns = sum(self._column_widths_cache)
+        #     if num_columns > 0:
+        #         ErrorMessage.catch_bugs_and_request_email(
+        #             num_columns != len(self._columns_cache),
+        #             f"Column widths: {num_columns} != {len(self._columns_cache)}",
+        #         )
+        #     ErrorMessage.catch_bugs_and_request_email(
+        #         any(val < 0 for val in self._column_widths_cache),
+        #         f"Column widths cannot be negative: {self._column_widths_cache}",
+        #     )
+
+    @property
+    def row_lengths(self):
+        """
+        Compute the row partitions lengths if they are not cached.
+
+        Returns
+        -------
+        list
+            A list of row partitions lengths.
+        """
+        if self._row_lengths_cache is None:
+            if len(self._partitions.T) > 0:
+                row_parts = self._partitions.T[0]
+                self._row_lengths_cache = [part.length() for part in row_parts]
+            else:
+                self._row_lengths_cache = []
+        return self._row_lengths_cache
+
+    @property
+    def column_widths(self):
+        """
+        Compute the column partitions widths if they are not cached.
+
+        Returns
+        -------
+        list
+            A list of column partitions widths.
+        """
+        if self._column_widths_cache is None:
+            if len(self._partitions) > 0:
+                col_parts = self._partitions[0]
+                self._column_widths_cache = [part.width() for part in col_parts]
+            else:
+                self._column_widths_cache = []
+        return self._column_widths_cache
+
+    @property
+    def _axes_lengths(self):
+        """
+        Get a pair of row partitions lengths and column partitions widths.
+
+        Returns
+        -------
+        list
+            The pair of row partitions lengths and column partitions widths.
+        """
+        return [self.row_lengths, self.column_widths]
+
+    @property
+    def dtypes(self):
+        """
+        Compute the data types if they are not cached.
+
+        Returns
+        -------
+        pandas.Series
+            A pandas Series containing the data types for this dataframe.
+        """
+        if self._dtypes is None:
+            self._dtypes = self._compute_dtypes()
+        return self._dtypes
+
+    def _compute_dtypes(self):
+        """
+        Compute the data types via TreeReduce pattern.
+
+        Returns
+        -------
+        pandas.Series
+            A pandas Series containing the data types for this dataframe.
+        """
+
+        def dtype_builder(df):
+            return df.apply(lambda col: find_common_type(col.values), axis=0)
+
+        # For now we will use a pandas Series for the dtypes.
+        if len(self.columns) > 0:
+            dtypes = (
+                self.tree_reduce(0, lambda df: df.dtypes, dtype_builder)
+                .to_pandas()
+                .iloc[0]
+            )
+        else:
+            dtypes = pandas.Series([])
+        # reset name to None because we use MODIN_UNNAMED_SERIES_LABEL internally
+        dtypes.name = None
+        return dtypes
+
+    _index_cache = None
+    _columns_cache = None
+
+    def _validate_set_axis(self, new_labels, old_labels):
+        """
+        Validate the possibility of replacement of old labels with the new labels.
+
+        Parameters
+        ----------
+        new_labels : list-like
+            The labels to replace with.
+        old_labels : list-like
+            The labels to replace.
+
+        Returns
+        -------
+        list-like
+            The validated labels.
+        """
+        new_labels = ensure_index(new_labels)
+        old_len = len(old_labels)
+        new_len = len(new_labels)
+        if old_len != new_len:
+            raise ValueError(
+                f"Length mismatch: Expected axis has {old_len} elements, "
+                + f"new values have {new_len} elements"
+            )
+        return new_labels
+
+    def _get_index(self):
+        """
+        Get the index from the cache object.
+
+        Returns
+        -------
+        pandas.Index
+            An index object containing the row labels.
+        """
+        if self._index_cache is None:
+            self._index_cache, row_lengths = self._compute_axis_labels_and_lengths(0)
+            if self._row_lengths_cache is None:
+                self._row_lengths_cache = row_lengths
+        return self._index_cache
+
+    def _get_columns(self):
+        """
+        Get the columns from the cache object.
+
+        Returns
+        -------
+        pandas.Index
+            An index object containing the column labels.
+        """
+        if self._columns_cache is None:
+            self._columns_cache, column_widths = self._compute_axis_labels_and_lengths(
+                1
+            )
+            if self._column_widths_cache is None:
+                self._column_widths_cache = column_widths
+        return self._columns_cache
+
+    def _set_index(self, new_index):
+        """
+        Replace the current row labels with new labels.
+
+        Parameters
+        ----------
+        new_index : list-like
+            The new row labels.
+        """
+        if self._index_cache is None:
+            self._index_cache = ensure_index(new_index)
+        else:
+            new_index = self._validate_set_axis(new_index, self._index_cache)
+            self._index_cache = new_index
+        self.synchronize_labels(axis=0)
+
+    def _set_columns(self, new_columns):
+        """
+        Replace the current column labels with new labels.
+
+        Parameters
+        ----------
+        new_columns : list-like
+           The new column labels.
+        """
+        if self._columns_cache is None:
+            self._columns_cache = ensure_index(new_columns)
+        else:
+            new_columns = self._validate_set_axis(new_columns, self._columns_cache)
+            self._columns_cache = new_columns
+            if self._dtypes is not None:
+                self._dtypes.index = new_columns
+        self.synchronize_labels(axis=1)
+
+    columns = property(_get_columns, _set_columns)
+    index = property(_get_index, _set_index)
+
+    @property
+    def axes(self):
+        """
+        Get index and columns that can be accessed with an `axis` integer.
+
+        Returns
+        -------
+        list
+            List with two values: index and columns.
+        """
+        return [self.index, self.columns]
+
+    def _compute_axis_labels_and_lengths(self, axis: int, partitions=None):
+        """
+        Compute the labels for specific `axis`.
+
+        Parameters
+        ----------
+        axis : int
+            Axis to compute labels along.
+        partitions : np.ndarray, optional
+            A 2D NumPy array of partitions from which labels will be grabbed.
+            If not specified, partitions will be taken from `self._partitions`.
+
+        Returns
+        -------
+        pandas.Index
+            Labels for the specified `axis`.
+        List of int
+            Size of partitions alongside specified `axis`.
+        """
+        if partitions is None:
+            partitions = self._partitions
+        new_index, internal_idx = self._partition_mgr_cls.get_indices(axis, partitions)
+        return new_index, list(map(len, internal_idx))
+
+    def _filter_empties(self, compute_metadata=True):
+        """
+        Remove empty partitions from `self._partitions` to avoid triggering excess computation.
+
+        Parameters
+        ----------
+        compute_metadata : bool, default: True
+            Trigger the computations for partition sizes and labels if they're not done already.
+        """
+        if not compute_metadata and (
+            self._index_cache is None
+            or self._columns_cache is None
+            or self._row_lengths_cache is None
+            or self._column_widths_cache is None
+        ):
+            # do not trigger the computations
+            return
+
+        if len(self.axes[0]) == 0 or len(self.axes[1]) == 0:
+            # This is the case for an empty frame. We don't want to completely remove
+            # all metadata and partitions so for the moment, we won't prune if the frame
+            # is empty.
+            # TODO: Handle empty dataframes better
+            return
+        self._partitions = np.array(
+            [
+                [
+                    self._partitions[i][j]
+                    for j in range(len(self._partitions[i]))
+                    if j < len(self.column_widths) and self.column_widths[j] != 0
+                ]
+                for i in range(len(self._partitions))
+                if i < len(self.row_lengths) and self.row_lengths[i] != 0
+            ]
+        )
+        self._column_widths_cache = [w for w in self.column_widths if w != 0]
+        self._row_lengths_cache = [r for r in self.row_lengths if r != 0]
+
+    def synchronize_labels(self, axis=None):
+        """
+        Set the deferred axes variables for the ``PandasDataframe``.
+
+        Parameters
+        ----------
+        axis : int, default: None
+            The deferred axis.
+            0 for the index, 1 for the columns.
+        """
+        if axis is None:
+            self._deferred_index = True
+            self._deferred_column = True
+        elif axis == 0:
+            self._deferred_index = True
+        else:
+            self._deferred_column = True
+
+    def _propagate_index_objs(self, axis=None):
+        """
+        Synchronize labels by applying the index object for specific `axis` to the `self._partitions` lazily.
+
+        Adds `set_axis` function to call-queue of each partition from `self._partitions`
+        to apply new axis.
+
+        Parameters
+        ----------
+        axis : int, default: None
+            The axis to apply to. If it's None applies to both axes.
+        """
+        self._filter_empties()
+        if axis is None or axis == 0:
+            cum_row_lengths = np.cumsum([0] + self.row_lengths)
+        if axis is None or axis == 1:
+            cum_col_widths = np.cumsum([0] + self.column_widths)
+
+        if axis is None:
+
+            def apply_idx_objs(df, idx, cols):
+                return df.set_axis(idx, axis="index").set_axis(cols, axis="columns")
+
+            self._partitions = np.array(
+                [
+                    [
+                        self._partitions[i][j].add_to_apply_calls(
+                            apply_idx_objs,
+                            idx=self.index[
+                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])
+                            ],
+                            cols=self.columns[
+                                slice(cum_col_widths[j], cum_col_widths[j + 1])
+                            ],
+                            length=self.row_lengths[i],
+                            width=self.column_widths[j],
+                        )
+                        for j in range(len(self._partitions[i]))
+                    ]
+                    for i in range(len(self._partitions))
+                ]
+            )
+            self._deferred_index = False
+            self._deferred_column = False
+        elif axis == 0:
+
+            def apply_idx_objs(df, idx):
+                return df.set_axis(idx, axis="index")
+
+            self._partitions = np.array(
+                [
+                    [
+                        self._partitions[i][j].add_to_apply_calls(
+                            apply_idx_objs,
+                            idx=self.index[
+                                slice(cum_row_lengths[i], cum_row_lengths[i + 1])
+                            ],
+                            length=self.row_lengths[i],
+                            width=self.column_widths[j],
+                        )
+                        for j in range(len(self._partitions[i]))
+                    ]
+                    for i in range(len(self._partitions))
+                ]
+            )
+            self._deferred_index = False
+        elif axis == 1:
+
+            def apply_idx_objs(df, cols):
+                return df.set_axis(cols, axis="columns")
+
+            self._partitions = np.array(
+                [
+                    [
+                        self._partitions[i][j].add_to_apply_calls(
+                            apply_idx_objs,
+                            cols=self.columns[
+                                slice(cum_col_widths[j], cum_col_widths[j + 1])
+                            ],
+                            length=self.row_lengths[i],
+                            width=self.column_widths[j],
+                        )
+                        for j in range(len(self._partitions[i]))
+                    ]
+                    for i in range(len(self._partitions))
+                ]
+            )
+            self._deferred_column = False
+        else:
+            ErrorMessage.internal_error(axis is not None and axis not in [0, 1])
+
+    @lazy_metadata_decorator(apply_axis=None)
+    def take_2d_labels_or_positional(
+        self,
+        row_labels: Optional[list[Hashable]] = None,
+        row_positions: Optional[list[int]] = None,
+        col_labels: Optional[list[Hashable]] = None,
+        col_positions: Optional[list[int]] = None,
+    ) -> "PandasDataframe":
+        """
+        Lazily select columns or rows from given indices.
+
+        Parameters
+        ----------
+        row_labels : list of hashable, optional
+            The row labels to extract.
+        row_positions : list-like of ints, optional
+            The row positions to extract.
+        col_labels : list of hashable, optional
+            The column labels to extract.
+        col_positions : list-like of ints, optional
+            The column positions to extract.
+
+        Returns
+        -------
+        PandasDataframe
+             A new PandasDataframe from the mask provided.
+
+        Notes
+        -----
+        If both `row_labels` and `row_positions` are provided, a ValueError is raised.
+        The same rule applies for `col_labels` and `col_positions`.
+        """
+        if check_both_not_none(row_labels, row_positions):
+            raise ValueError(
+                "Both row_labels and row_positions were provided - "
+                + "please provide only one of row_labels and row_positions."
+            )
+        if check_both_not_none(col_labels, col_positions):
+            raise ValueError(
+                "Both col_labels and col_positions were provided - "
+                + "please provide only one of col_labels and col_positions."
+            )
+
+        if row_labels is not None:
+            # Get numpy array of positions of values from `row_labels`
+            if isinstance(self.index, pandas.MultiIndex):
+                row_positions = np.zeros(len(row_labels), dtype="int64")
+                # we can't use .get_locs(row_labels) because the function
+                # requires a different format for row_labels
+                for idx, label in enumerate(row_labels):
+                    if isinstance(label, str):
+                        label = [label]
+                    # get_loc can return slice that _take_2d_positional can't handle
+                    row_positions[idx] = self.index.get_locs(label)[0]
+            else:
+                row_positions = self.index.get_indexer_for(row_labels)
+
+        if col_labels is not None:
+            # Get numpy array of positions of values from `col_labels`
+            if isinstance(self.columns, pandas.MultiIndex):
+                col_positions = np.zeros(len(col_labels), dtype="int64")
+                # we can't use .get_locs(col_labels) because the function
+                # requires a different format for row_labels
+                for idx, label in enumerate(col_labels):
+                    if isinstance(label, str):
+                        label = [label]
+                    # get_loc can return slice that _take_2d_positional can't handle
+                    col_positions[idx] = self.columns.get_locs(label)[0]
+            else:
+                col_positions = self.columns.get_indexer_for(col_labels)
+
+        return self._take_2d_positional(row_positions, col_positions)
+
+    def _get_sorted_positions(self, positions):
+        """
+        Sort positions if necessary.
+
+        Parameters
+        ----------
+        positions : Sequence[int]
+
+        Returns
+        -------
+        Sequence[int]
+        """
+        # Helper for take_2d_positional
+        if is_range_like(positions) and positions.step > 0:
+            sorted_positions = positions
+        else:
+            sorted_positions = np.sort(positions)
+        return sorted_positions
+
+    def _get_new_lengths(self, partitions_dict, *, axis: int) -> list[int]:
+        """
+        Find lengths of new partitions.
+
+        Parameters
+        ----------
+        partitions_dict : dict
+        axis : int
+
+        Returns
+        -------
+        list[int]
+        """
+        # Helper for take_2d_positional
+        if axis == 0:
+            axis_lengths = self.row_lengths
+        else:
+            axis_lengths = self.column_widths
+
+        new_lengths = [
+            len(
+                # Row lengths for slice are calculated as the length of the slice
+                # on the partition. Often this will be the same length as the current
+                # length, but sometimes it is different, thus the extra calculation.
+                range(*part_indexer.indices(axis_lengths[part_idx]))
+                if isinstance(part_indexer, slice)
+                else part_indexer
+            )
+            for part_idx, part_indexer in partitions_dict.items()
+        ]
+        return new_lengths
+
+    def _get_new_index_obj(
+        self, positions, sorted_positions, axis: int
+    ) -> "tuple[pandas.Index, slice | npt.NDArray[np.intp]]":
+        """
+        Find the new Index object for take_2d_positional result.
+
+        Parameters
+        ----------
+        positions : Sequence[int]
+        sorted_positions : Sequence[int]
+        axis : int
+
+        Returns
+        -------
+        pandas.Index
+        slice or Sequence[int]
+        """
+        # Helper for take_2d_positional
+        # Use the slice to calculate the new columns
+        if axis == 0:
+            idx = self.index
+        else:
+            idx = self.columns
+
+        # TODO: Support fast processing of negative-step ranges
+        if is_range_like(positions) and positions.step > 0:
+            # pandas Index is more likely to preserve its metadata if the indexer
+            #  is slice
+            monotonic_idx = slice(positions.start, positions.stop, positions.step)
+        else:
+            monotonic_idx = np.asarray(sorted_positions, dtype=np.intp)
+
+        new_idx = idx[monotonic_idx]
+        return new_idx, monotonic_idx
+
+    def _take_2d_positional(
+        self,
+        row_positions: Optional[list[int]] = None,
+        col_positions: Optional[list[int]] = None,
+    ) -> "PandasDataframe":
+        """
+        Lazily select columns or rows from given indices.
+
+        Parameters
+        ----------
+        row_positions : list-like of ints, optional
+            The row positions to extract.
+        col_positions : list-like of ints, optional
+            The column positions to extract.
+
+        Returns
+        -------
+        PandasDataframe
+             A new PandasDataframe from the mask provided.
+        """
+        indexers = []
+        for axis, indexer in enumerate((row_positions, col_positions)):
+            if is_range_like(indexer):
+                if indexer.step == 1 and len(indexer) == len(self.axes[axis]):
+                    # By this function semantics, `None` indexer is a full-axis access
+                    indexer = None
+                elif indexer is not None and not isinstance(indexer, pandas.RangeIndex):
+                    # Pure python's range is not fully compatible with a list of ints,
+                    # converting it to ``pandas.RangeIndex``` that is compatible.
+                    indexer = pandas.RangeIndex(
+                        indexer.start, indexer.stop, indexer.step
+                    )
+            else:
+                ErrorMessage.internal_error(
+                    failure_condition=not (indexer is None or is_list_like(indexer)),
+                    extra_log="Mask takes only list-like numeric indexers, "
+                    + f"received: {type(indexer)}",
+                )
+            indexers.append(indexer)
+        row_positions, col_positions = indexers
+
+        if col_positions is None and row_positions is None:
+            return self.copy()
+
+        sorted_row_positions = sorted_col_positions = None
+
+        if row_positions is not None:
+            sorted_row_positions = self._get_sorted_positions(row_positions)
+            # Get dict of row_parts as {row_index: row_internal_indices}
+            row_partitions_dict = self._get_dict_of_block_index(
+                0, sorted_row_positions, are_indices_sorted=True
+            )
+            new_row_lengths = self._get_new_lengths(row_partitions_dict, axis=0)
+            new_index, _ = self._get_new_index_obj(
+                row_positions, sorted_row_positions, axis=0
+            )
+        else:
+            row_partitions_dict = {i: slice(None) for i in range(len(self._partitions))}
+            new_row_lengths = self._row_lengths_cache
+            new_index = self._index_cache
+
+        if col_positions is not None:
+            sorted_col_positions = self._get_sorted_positions(col_positions)
+            # Get dict of col_parts as {col_index: col_internal_indices}
+            col_partitions_dict = self._get_dict_of_block_index(
+                1, sorted_col_positions, are_indices_sorted=True
+            )
+            new_col_widths = self._get_new_lengths(col_partitions_dict, axis=1)
+            new_columns, monotonic_col_idx = self._get_new_index_obj(
+                col_positions, sorted_col_positions, axis=1
+            )
+
+            ErrorMessage.internal_error(
+                failure_condition=sum(new_col_widths) != len(new_columns),
+                extra_log=f"{sum(new_col_widths)} != {len(new_columns)}.\n"
+                + f"{col_positions}\n{self.column_widths}\n{col_partitions_dict}",
+            )
+
+            if self._dtypes is not None:
+                new_dtypes = self.dtypes.iloc[monotonic_col_idx]
+            else:
+                new_dtypes = None
+        else:
+            col_partitions_dict = {
+                i: slice(None) for i in range(len(self._partitions.T))
+            }
+            new_col_widths = self._column_widths_cache
+            new_columns = self._columns_cache
+            new_dtypes = self._dtypes
+
+        new_partitions = np.array(
+            [
+                [
+                    self._partitions[row_idx][col_idx].mask(
+                        row_internal_indices, col_internal_indices
+                    )
+                    for col_idx, col_internal_indices in col_partitions_dict.items()
+                ]
+                for row_idx, row_internal_indices in row_partitions_dict.items()
+            ]
+        )
+        intermediate = self.__constructor__(
+            new_partitions,
+            new_index,
+            new_columns,
+            new_row_lengths,
+            new_col_widths,
+            new_dtypes,
+        )
+
+        return self._maybe_reorder_labels(
+            intermediate,
+            row_positions,
+            sorted_row_positions,
+            col_positions,
+            sorted_col_positions,
+        )
+
+    def _maybe_reorder_labels(
+        self,
+        intermediate: "PandasDataframe",
+        row_positions,
+        sorted_row_positions,
+        col_positions,
+        sorted_col_positions,
+    ) -> "PandasDataframe":
+        """
+        Call re-order labels on take_2d_labels_or_positional result if necessary.
+
+        Parameters
+        ----------
+        intermediate : PandasDataFrame
+        row_positions : list-like of ints, optional
+            The row positions to extract.
+        sorted_row_positions : list-like of ints, optional
+            Sorted version of row_positions.
+        col_positions : list-like of ints, optional
+            The column positions to extract.
+        sorted_col_positions : list-like of ints, optional
+            Sorted version of col_positions.
+
+        Returns
+        -------
+        PandasDataframe
+        """
+        # Check if monotonically increasing, return if it is. Fast track code path for
+        # common case to keep it fast.
+        if (
+            row_positions is None
+            # Fast range processing of non-positive-step ranges is not yet supported
+            or (is_range_like(row_positions) and row_positions.step > 0)
+            or len(row_positions) == 1
+            or np.all(row_positions[1:] >= row_positions[:-1])
+        ) and (
+            col_positions is None
+            # Fast range processing of non-positive-step ranges is not yet supported
+            or (is_range_like(col_positions) and col_positions.step > 0)
+            or len(col_positions) == 1
+            or np.all(col_positions[1:] >= col_positions[:-1])
+        ):
+            return intermediate
+
+        # The new labels are often smaller than the old labels, so we can't reuse the
+        # original order values because those were mapped to the original data. We have
+        # to reorder here based on the expected order from within the data.
+        # We create a dictionary mapping the position of the numeric index with respect
+        # to all others, then recreate that order by mapping the new order values from
+        # the old. This information is sent to `_reorder_labels`.
+        if row_positions is not None:
+            row_order_mapping = dict(
+                zip(sorted_row_positions, range(len(row_positions)))
+            )
+            new_row_order = [row_order_mapping[idx] for idx in row_positions]
+        else:
+            new_row_order = None
+        if col_positions is not None:
+            col_order_mapping = dict(
+                zip(sorted_col_positions, range(len(col_positions)))
+            )
+            new_col_order = [col_order_mapping[idx] for idx in col_positions]
+        else:
+            new_col_order = None
+        return intermediate._reorder_labels(
+            row_positions=new_row_order, col_positions=new_col_order
+        )
+
+    @lazy_metadata_decorator(apply_axis="rows")
+    def from_labels(self) -> "PandasDataframe":
+        """
+        Convert the row labels to a column of data, inserted at the first position.
+
+        Gives result by similar way as `pandas.DataFrame.reset_index`. Each level
+        of `self.index` will be added as separate column of data.
+
+        Returns
+        -------
+        PandasDataframe
+            A PandasDataframe with new columns from index labels.
+        """
+        new_row_labels = pandas.RangeIndex(len(self.index))
+        if self.index.nlevels > 1:
+            level_names = [
+                self.index.names[i] if self.index.names[i] is not None else f"level_{i}"
+                for i in range(self.index.nlevels)
+            ]
+        else:
+            level_names = [
+                self.index.names[0]
+                if self.index.names[0] is not None
+                else "index"
+                if "index" not in self.columns
+                else f"level_{0}"
+            ]
+        new_dtypes = None
+        if self._dtypes is not None:
+            names = tuple(level_names) if len(level_names) > 1 else level_names[0]
+            new_dtypes = self.index.to_frame(name=names).dtypes
+            new_dtypes = pandas.concat([new_dtypes, self._dtypes])
+
+        # We will also use the `new_column_names` in the calculation of the internal metadata, so this is a
+        # lightweight way of ensuring the metadata matches.
+        if self.columns.nlevels > 1:
+            # Column labels are different for multilevel index.
+            new_column_names = pandas.MultiIndex.from_tuples(
+                # Set level names on the 1st columns level and fill up empty level names with empty string.
+                # Expand tuples in level names. This is how reset_index works when col_level col_fill are not specified.
+                [
+                    tuple(
+                        list(level) + [""] * (self.columns.nlevels - len(level))
+                        if isinstance(level, tuple)
+                        else [level] + [""] * (self.columns.nlevels - 1)
+                    )
+                    for level in level_names
+                ],
+                names=self.columns.names,
+            )
+        else:
+            new_column_names = pandas.Index(level_names, tupleize_cols=False)
+        new_columns = new_column_names.append(self.columns)
+
+        def from_labels_executor(df, **kwargs):
+            # Setting the names here ensures that external and internal metadata always match.
+            df.index.names = new_column_names
+
+            # Handling of a case when columns have the same name as one of index levels names.
+            # In this case `df.reset_index` provides errors related to columns duplication.
+            # This case is possible because columns metadata updating is deferred. To workaround
+            # `df.reset_index` error we allow columns duplication in "if" branch via `concat`.
+            if any(name_level in df.columns for name_level in df.index.names):
+                columns_to_add = df.index.to_frame()
+                columns_to_add.reset_index(drop=True, inplace=True)
+                df = df.reset_index(drop=True)
+                result = pandas.concat([columns_to_add, df], axis=1, copy=False)
+            else:
+                result = df.reset_index()
+            # Put the index back to the original due to GH#4394
+            result.index = df.index
+            return result
+
+        new_parts = self._partition_mgr_cls.apply_func_to_select_indices(
+            0,
+            self._partitions,
+            from_labels_executor,
+            [0],
+            keep_remaining=True,
+        )
+        new_column_widths = [
+            self.index.nlevels + self.column_widths[0]
+        ] + self.column_widths[1:]
+        result = self.__constructor__(
+            new_parts,
+            new_row_labels,
+            new_columns,
+            row_lengths=self._row_lengths_cache,
+            column_widths=new_column_widths,
+            dtypes=new_dtypes,
+        )
+        # Set flag for propagating deferred row labels across dataframe partitions
+        result.synchronize_labels(axis=0)
+        return result
+
+    def to_labels(self, column_list: list[Hashable]) -> "PandasDataframe":
+        """
+        Move one or more columns into the row labels. Previous labels are dropped.
+
+        Parameters
+        ----------
+        column_list : list of hashable
+            The list of column names to place as the new row labels.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe that has the updated labels.
+        """
+        extracted_columns = self.take_2d_labels_or_positional(
+            col_labels=column_list
+        ).to_pandas()
+
+        if len(column_list) == 1:
+            new_labels = pandas.Index(
+                extracted_columns.squeeze(axis=1), name=column_list[0]
+            )
+        else:
+            new_labels = pandas.MultiIndex.from_frame(
+                extracted_columns, names=column_list
+            )
+        result = self.take_2d_labels_or_positional(
+            col_labels=[i for i in self.columns if i not in extracted_columns.columns]
+        )
+        result.index = new_labels
+        return result
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def _reorder_labels(self, row_positions=None, col_positions=None):
+        """
+        Reorder the column and or rows in this DataFrame.
+
+        Parameters
+        ----------
+        row_positions : list of int, optional
+            The ordered list of new row orders such that each position within the list
+            indicates the new position.
+        col_positions : list of int, optional
+            The ordered list of new column orders such that each position within the
+            list indicates the new position.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe with reordered columns and/or rows.
+        """
+        new_dtypes = self._dtypes
+        if row_positions is not None:
+            ordered_rows = self._partition_mgr_cls.map_axis_partitions(
+                0, self._partitions, lambda df: df.iloc[row_positions]
+            )
+            row_idx = self.index[row_positions]
+
+            if self._partitions.shape[0] != ordered_rows.shape[0] or len(
+                row_idx
+            ) != len(self.index):
+                # The frame was re-partitioned along the 0 axis during reordering using
+                # the "standard" partitioning. Knowing the standard partitioning scheme
+                # we are able to compute new row lengths.
+                new_lengths = get_length_list(  # noqa: F821
+                    axis_len=len(row_idx), num_splits=ordered_rows.shape[0]
+                )
+            else:
+                # If the frame's partitioning was preserved then
+                # we can use previous row lengths cache
+                new_lengths = self._row_lengths_cache
+        else:
+            ordered_rows = self._partitions
+            row_idx = self.index
+            new_lengths = self._row_lengths_cache
+        if col_positions is not None:
+            ordered_cols = self._partition_mgr_cls.map_axis_partitions(
+                1, ordered_rows, lambda df: df.iloc[:, col_positions]
+            )
+            col_idx = self.columns[col_positions]
+            if new_dtypes is not None:
+                new_dtypes = self._dtypes.iloc[col_positions]
+
+            if self._partitions.shape[1] != ordered_cols.shape[1] or len(
+                col_idx
+            ) != len(self.columns):
+                # The frame was re-partitioned along the 1 axis during reordering using
+                # the "standard" partitioning. Knowing the standard partitioning scheme
+                # we are able to compute new column widths.
+                new_widths = get_length_list(  # noqa: F821
+                    axis_len=len(col_idx), num_splits=ordered_cols.shape[1]
+                )
+            else:
+                # If the frame's partitioning was preserved then
+                # we can use previous column widths cache
+                new_widths = self._column_widths_cache
+        else:
+            ordered_cols = ordered_rows
+            col_idx = self.columns
+            new_widths = self._column_widths_cache
+        return self.__constructor__(
+            ordered_cols, row_idx, col_idx, new_lengths, new_widths, new_dtypes
+        )
+
+    @lazy_metadata_decorator(apply_axis=None)
+    def copy(self):
+        """
+        Copy this object.
+
+        Returns
+        -------
+        PandasDataframe
+            A copied version of this object.
+        """
+        return self.__constructor__(
+            self._partitions,
+            self._index_cache.copy() if self._index_cache is not None else None,
+            self._columns_cache.copy() if self._columns_cache is not None else None,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+            self._dtypes.copy() if self._dtypes is not None else None,
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def astype(self, col_dtypes):
+        """
+        Convert the columns dtypes to given dtypes.
+
+        Parameters
+        ----------
+        col_dtypes : dictionary of {col: dtype,...}
+            Where col is the column name and dtype is a NumPy dtype.
+
+        Returns
+        -------
+        BaseDataFrame
+            Dataframe with updated dtypes.
+        """
+        columns = col_dtypes.keys()
+        # Create Series for the updated dtypes
+        new_dtypes = self.dtypes.copy()
+        for _, column in enumerate(columns):
+            dtype = col_dtypes[column]
+            if (
+                not isinstance(dtype, type(self.dtypes[column]))
+                or dtype != self.dtypes[column]
+            ):
+                # Update the new dtype series to the proper pandas dtype
+                try:
+                    new_dtype = np.dtype(dtype)
+                except TypeError:
+                    new_dtype = dtype
+
+                if dtype != np.int32 and new_dtype == np.int32:
+                    new_dtypes[column] = np.dtype("int64")
+                elif dtype != np.float32 and new_dtype == np.float32:
+                    new_dtypes[column] = np.dtype("float64")
+                # We cannot infer without computing the dtype if
+                elif isinstance(new_dtype, str) and new_dtype == "category":
+                    new_dtypes = None
+                    break
+                else:
+                    new_dtypes[column] = new_dtype
+
+        def astype_builder(df):
+            """Compute new partition frame with dtypes updated."""
+            return df.astype({k: v for k, v in col_dtypes.items() if k in df})
+
+        new_frame = self._partition_mgr_cls.map_partitions(
+            self._partitions, astype_builder
+        )
+        return self.__constructor__(
+            new_frame,
+            self._index_cache,
+            self._columns_cache,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+            new_dtypes,
+        )
+
+    # Metadata modification methods
+    def add_prefix(self, prefix, axis):
+        """
+        Add a prefix to the current row or column labels.
+
+        Parameters
+        ----------
+        prefix : str
+            The prefix to add.
+        axis : int
+            The axis to update.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe with the updated labels.
+        """
+
+        def new_labels_mapper(x, prefix=str(prefix)):  # noqa B008
+            return prefix + str(x)
+
+        if axis == 0:
+            return self.rename(new_row_labels=new_labels_mapper)
+        return self.rename(new_col_labels=new_labels_mapper)
+
+    def add_suffix(self, suffix, axis):
+        """
+        Add a suffix to the current row or column labels.
+
+        Parameters
+        ----------
+        suffix : str
+            The suffix to add.
+        axis : int
+            The axis to update.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe with the updated labels.
+        """
+
+        def new_labels_mapper(x, suffix=str(suffix)):  # noqa B008
+            return str(x) + suffix
+
+        if axis == 0:
+            return self.rename(new_row_labels=new_labels_mapper)
+        return self.rename(new_col_labels=new_labels_mapper)
+
+    # END Metadata modification methods
+
+    def numeric_columns(self, include_bool=True):
+        """
+        Return the names of numeric columns in the frame.
+
+        Parameters
+        ----------
+        include_bool : bool, default: True
+            Whether to consider boolean columns as numeric.
+
+        Returns
+        -------
+        list
+            List of column names.
+        """
+        columns = []
+        for col, dtype in zip(self.columns, self.dtypes):
+            if is_numeric_dtype(dtype) and (
+                include_bool or (not include_bool and dtype != np.bool_)
+            ):
+                columns.append(col)
+        return columns
+
+    def _get_dict_of_block_index(self, axis, indices, are_indices_sorted=False):
+        """
+        Convert indices to an ordered dict mapping partition (or block) index to internal indices in said partition.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            The axis along which to get the indices (0 - rows, 1 - columns).
+        indices : list of int, slice
+            A list of global indices to convert.
+        are_indices_sorted : bool, default: False
+            Flag indicating whether the `indices` sequence is sorted by ascending or not.
+            Note: the internal algorithm requires for the `indices` to be sorted, this
+            flag is used for optimization in order to not sort already sorted data.
+            Be careful when passing ``True`` for this flag, if the data appears to be unsorted
+            with the flag set to ``True`` this would lead to undefined behavior.
+
+        Returns
+        -------
+        OrderedDict
+            A mapping from partition index to list of internal indices which correspond to `indices` in each
+            partition.
+        """
+        # TODO: Support handling of slices with specified 'step'. For now, converting them into a range
+        if isinstance(indices, slice) and (
+            indices.step is not None and indices.step != 1
+        ):
+            indices = range(*indices.indices(len(self.axes[axis])))
+        # Fasttrack slices
+        if isinstance(indices, slice) or (is_range_like(indices) and indices.step == 1):
+            # Converting range-like indexer to slice
+            indices = slice(indices.start, indices.stop, indices.step)
+            if is_full_grab_slice(indices, sequence_len=len(self.axes[axis])):
+                return OrderedDict(
+                    zip(
+                        range(self._partitions.shape[axis]),
+                        [slice(None)] * self._partitions.shape[axis],
+                    )
+                )
+            # Empty selection case
+            if indices.start == indices.stop and indices.start is not None:
+                return OrderedDict()
+            if indices.start is None or indices.start == 0:
+                last_part, last_idx = list(
+                    self._get_dict_of_block_index(axis, [indices.stop]).items()
+                )[0]
+                dict_of_slices = OrderedDict(
+                    zip(range(last_part), [slice(None)] * last_part)
+                )
+                dict_of_slices.update({last_part: slice(last_idx[0])})
+                return dict_of_slices
+            elif indices.stop is None or indices.stop >= len(self.axes[axis]):
+                first_part, first_idx = list(
+                    self._get_dict_of_block_index(axis, [indices.start]).items()
+                )[0]
+                dict_of_slices = OrderedDict({first_part: slice(first_idx[0], None)})
+                num_partitions = np.size(self._partitions, axis=axis)
+                part_list = range(first_part + 1, num_partitions)
+                dict_of_slices.update(
+                    OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
+                )
+                return dict_of_slices
+            else:
+                first_part, first_idx = list(
+                    self._get_dict_of_block_index(axis, [indices.start]).items()
+                )[0]
+                last_part, last_idx = list(
+                    self._get_dict_of_block_index(axis, [indices.stop]).items()
+                )[0]
+                if first_part == last_part:
+                    return OrderedDict({first_part: slice(first_idx[0], last_idx[0])})
+                else:
+                    if last_part - first_part == 1:
+                        return OrderedDict(
+                            # FIXME: this dictionary creation feels wrong - it might not maintain the order
+                            {
+                                first_part: slice(first_idx[0], None),
+                                last_part: slice(None, last_idx[0]),
+                            }
+                        )
+                    else:
+                        dict_of_slices = OrderedDict(
+                            {first_part: slice(first_idx[0], None)}
+                        )
+                        part_list = range(first_part + 1, last_part)
+                        dict_of_slices.update(
+                            OrderedDict(zip(part_list, [slice(None)] * len(part_list)))
+                        )
+                        dict_of_slices.update({last_part: slice(None, last_idx[0])})
+                        return dict_of_slices
+        if isinstance(indices, list):
+            # Converting python list to numpy for faster processing
+            indices = np.array(indices, dtype=np.int64)
+        # Fasttrack empty numpy array
+        if isinstance(indices, np.ndarray) and indices.size == 0:
+            # This will help preserve metadata stored in empty dataframes (indexes and dtypes)
+            # Otherwise, we will get an empty `new_partitions` array, from which it will
+            #  no longer be possible to obtain metadata
+            return OrderedDict([(0, np.array([], dtype=np.int64))])
+        negative_mask = np.less(indices, 0)
+        has_negative = np.any(negative_mask)
+        if has_negative:
+            # We're going to modify 'indices' inplace in a numpy way, so doing a copy/converting indices to numpy.
+            indices = (
+                indices.copy()
+                if isinstance(indices, np.ndarray)
+                else np.array(indices, dtype=np.int64)
+            )
+            indices[negative_mask] = indices[negative_mask] % len(self.axes[axis])
+        # If the `indices` array was modified because of the negative indices conversion
+        # then the original order was broken and so we have to sort anyway:
+        if has_negative or not are_indices_sorted:
+            indices = np.sort(indices)
+        if axis == 0:
+            bins = np.array(self.row_lengths)
+        else:
+            bins = np.array(self.column_widths)
+        # INT_MAX to make sure we don't try to compute on partitions that don't exist.
+        cumulative = np.append(bins[:-1].cumsum(), np.iinfo(bins.dtype).max)
+
+        def internal(block_idx: int, global_index):
+            """Transform global index to internal one for given block (identified by its index)."""
+            return (
+                global_index
+                if not block_idx
+                else np.subtract(
+                    global_index, cumulative[min(block_idx, len(cumulative) - 1) - 1]
+                )
+            )
+
+        partition_ids = np.digitize(indices, cumulative)
+        count_for_each_partition = np.array(
+            [(partition_ids == i).sum() for i in range(len(cumulative))]
+        ).cumsum()
+        # Compute the internal indices and pair those with the partition index.
+        # If the first partition has any values we need to return, compute those
+        # first to make the list comprehension easier. Otherwise, just append the
+        # rest of the values to an empty list.
+        if count_for_each_partition[0] > 0:
+            first_partition_indices = [
+                (0, internal(0, indices[slice(count_for_each_partition[0])]))
+            ]
+        else:
+            first_partition_indices = []
+        partition_ids_with_indices = first_partition_indices + [
+            (
+                i,
+                internal(
+                    i,
+                    indices[
+                        slice(
+                            count_for_each_partition[i - 1],
+                            count_for_each_partition[i],
+                        )
+                    ],
+                ),
+            )
+            for i in range(1, len(count_for_each_partition))
+            if count_for_each_partition[i] > count_for_each_partition[i - 1]
+        ]
+        return OrderedDict(partition_ids_with_indices)
+
+    @staticmethod
+    def _join_index_objects(axis, indexes, how, sort):
+        """
+        Join the pair of index objects (columns or rows) by a given strategy.
+
+        Unlike Index.join() in pandas, if `axis` is 1, `sort` is False,
+        and `how` is "outer", the result will _not_ be sorted.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            The axis index object to join (0 - rows, 1 - columns).
+        indexes : list(Index)
+            The indexes to join on.
+        how : {'left', 'right', 'inner', 'outer', None}
+            The type of join to join to make. If `None` then joined index
+            considered to be the first index in the `indexes` list.
+        sort : boolean
+            Whether or not to sort the joined index.
+
+        Returns
+        -------
+        (Index, func)
+            Joined index with make_reindexer func.
+        """
+        assert isinstance(indexes, list)
+
+        # define helper functions
+        def merge(left_index, right_index):
+            """Combine a pair of indices depending on `axis`, `how` and `sort` from outside."""
+            if axis == 1 and how == "outer" and not sort:
+                return left_index.union(right_index, sort=False)
+            else:
+                return left_index.join(right_index, how=how, sort=sort)
+
+        # define condition for joining indexes
+        all_indices_equal = all(indexes[0].equals(index) for index in indexes[1:])
+        do_join_index = how is not None and not all_indices_equal
+
+        # define condition for joining indexes with getting indexers
+        need_indexers = (
+            axis == 0
+            and not all_indices_equal
+            and any(not index.is_unique for index in indexes)
+        )
+        indexers = None
+
+        # perform joining indexes
+        if do_join_index:
+            if len(indexes) == 2 and need_indexers:
+                # in case of count of indexes > 2 we should perform joining all indexes
+                # after that get indexers
+                # in the fast path we can obtain joined_index and indexers in one call
+                indexers = [None, None]
+                joined_index, indexers[0], indexers[1] = indexes[0].join(
+                    indexes[1], how=how, sort=sort, return_indexers=True
+                )
+            else:
+                joined_index = indexes[0]
+                # TODO: revisit for performance
+                for index in indexes[1:]:
+                    joined_index = merge(joined_index, index)
+        else:
+            joined_index = indexes[0].copy()
+
+        if need_indexers and indexers is None:
+            indexers = [index.get_indexer_for(joined_index) for index in indexes]
+
+        def make_reindexer(do_reindex: bool, frame_idx: int):
+            """Create callback that reindexes the dataframe using newly computed index."""
+            # the order of the frames must match the order of the indexes
+            if not do_reindex:
+                return lambda df: df
+
+            if need_indexers:
+                assert indexers is not None
+
+                return lambda df: df._reindex_with_indexers(
+                    {0: [joined_index, indexers[frame_idx]]},
+                    copy=True,
+                    allow_dups=True,
+                )
+            return lambda df: df.reindex(joined_index, axis=axis)
+
+        return joined_index, make_reindexer
+
+    # Internal methods
+    # These methods are for building the correct answer in a modular way.
+    # Please be careful when changing these!
+
+    def _build_treereduce_func(self, axis, func):
+        """
+        Properly formats a TreeReduce result so that the partitioning is correct.
+
+        Parameters
+        ----------
+        axis : int
+            The axis along which to apply the function.
+        func : callable
+            The function to apply.
+
+        Returns
+        -------
+        callable
+            A function to be shipped to the partitions to be executed.
+
+        Notes
+        -----
+        This should be used for any TreeReduce style operation that results in a
+        reduced data dimensionality (dataframe -> series).
+        """
+
+        def _tree_reduce_func(df, *args, **kwargs):
+            """Tree-reducer function itself executing `func`, presenting the resulting pandas.Series as pandas.DataFrame."""
+            series_result = func(df, *args, **kwargs)
+            if axis == 0 and isinstance(series_result, pandas.Series):
+                # In the case of axis=0, we need to keep the shape of the data
+                # consistent with what we have done. In the case of a reduce, the
+                # data for axis=0 should be a single value for each column. By
+                # transposing the data after we convert to a DataFrame, we ensure that
+                # the columns of the result line up with the columns from the data.
+                # axis=1 does not have this requirement because the index already will
+                # line up with the index of the data based on how pandas creates a
+                # DataFrame from a Series.
+                result = pandas.DataFrame(series_result).T
+                result.index = [MODIN_UNNAMED_SERIES_LABEL]
+            else:
+                result = pandas.DataFrame(series_result)
+                if isinstance(series_result, pandas.Series):
+                    result.columns = [MODIN_UNNAMED_SERIES_LABEL]
+            return result
+
+        return _tree_reduce_func
+
+    def _compute_tree_reduce_metadata(self, axis, new_parts):
+        """
+        Compute the metadata for the result of reduce function.
+
+        Parameters
+        ----------
+        axis : int
+            The axis on which reduce function was applied.
+        new_parts : NumPy 2D array
+            Partitions with the result of applied function.
+
+        Returns
+        -------
+        PandasDataframe
+            Modin series (1xN frame) containing the reduced data.
+        """
+        new_axes, new_axes_lengths = [0, 0], [0, 0]
+
+        new_axes[axis] = [MODIN_UNNAMED_SERIES_LABEL]
+        new_axes[axis ^ 1] = self.axes[axis ^ 1]
+
+        new_axes_lengths[axis] = [1]
+        new_axes_lengths[axis ^ 1] = self._axes_lengths[axis ^ 1]
+
+        new_dtypes = None
+        result = self.__constructor__(
+            new_parts,
+            *new_axes,
+            *new_axes_lengths,
+            new_dtypes,
+        )
+        return result
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def reduce(
+        self,
+        axis: Union[int, Axis],
+        function: Callable,
+        dtypes: Optional[str] = None,
+    ) -> "PandasDataframe":
+        """
+        Perform a user-defined aggregation on the specified axis, where the axis reduces down to a singleton. Requires knowledge of the full axis for the reduction.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to perform the reduce over.
+        function : callable(row|col) -> single value
+            The reduce function to apply to each column.
+        dtypes : str, optional
+            The data types for the result. This is an optimization
+            because there are functions that always result in a particular data
+            type, and this allows us to avoid (re)computing it.
+
+        Returns
+        -------
+        PandasDataframe
+            Modin series (1xN frame) containing the reduced data.
+
+        Notes
+        -----
+        The user-defined function must reduce to a single value.
+        """
+        axis = Axis(axis)
+        function = self._build_treereduce_func(axis.value, function)
+        new_parts = self._partition_mgr_cls.map_axis_partitions(
+            axis.value, self._partitions, function
+        )
+        return self._compute_tree_reduce_metadata(axis.value, new_parts)
+
+    @lazy_metadata_decorator(apply_axis="opposite", axis_arg=0)
+    def tree_reduce(
+        self,
+        axis: Union[int, Axis],
+        map_func: Callable,
+        reduce_func: Optional[Callable] = None,
+        dtypes: Optional[str] = None,
+    ) -> "PandasDataframe":
+        """
+        Apply function that will reduce the data to a pandas Series.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to perform the tree reduce over.
+        map_func : callable(row|col) -> row|col
+            Callable function to map the dataframe.
+        reduce_func : callable(row|col) -> single value, optional
+            Callable function to reduce the dataframe.
+            If none, then apply map_func twice.
+        dtypes : str, optional
+            The data types for the result. This is an optimization
+            because there are functions that always result in a particular data
+            type, and this allows us to avoid (re)computing it.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+        """
+        axis = Axis(axis)
+        map_func = self._build_treereduce_func(axis.value, map_func)
+        if reduce_func is None:
+            reduce_func = map_func
+        else:
+            reduce_func = self._build_treereduce_func(axis.value, reduce_func)
+
+        map_parts = self._partition_mgr_cls.map_partitions(self._partitions, map_func)
+        reduce_parts = self._partition_mgr_cls.map_axis_partitions(
+            axis.value, map_parts, reduce_func
+        )
+        return self._compute_tree_reduce_metadata(axis.value, reduce_parts)
+
+    @lazy_metadata_decorator(apply_axis=None)
+    def map(self, func: Callable, dtypes: Optional[str] = None) -> "PandasDataframe":
+        """
+        Perform a function that maps across the entire dataset.
+
+        Parameters
+        ----------
+        func : callable(row|col|cell) -> row|col|cell
+            The function to apply.
+        dtypes : dtypes of the result, optional
+            The data types for the result. This is an optimization
+            because there are functions that always result in a particular data
+            type, and this allows us to avoid (re)computing it.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+        """
+        new_partitions = self._partition_mgr_cls.map_partitions(self._partitions, func)
+        if dtypes == "copy":
+            dtypes = self._dtypes
+        elif dtypes is not None:
+            dtypes = pandas.Series(
+                [np.dtype(dtypes)] * len(self.columns), index=self.columns
+            )
+        return self.__constructor__(
+            new_partitions,
+            self._index_cache,
+            self._columns_cache,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+            dtypes=dtypes,
+        )
+
+    def window(
+        self,
+        axis: Union[int, Axis],
+        reduce_fn: Callable,
+        window_size: int,
+        result_schema: Optional[dict[Hashable, type]] = None,
+    ) -> "PandasDataframe":
+        """
+        Apply a sliding window operator that acts as a GROUPBY on each window, and reduces down to a single row (column) per window.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to slide over.
+        reduce_fn : callable(rowgroup|colgroup) -> row|col
+            The reduce function to apply over the data.
+        window_size : int
+            The number of row/columns to pass to the function.
+            (The size of the sliding window).
+        result_schema : dict, optional
+            Mapping from column labels to data types that represents the types of the output dataframe.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe with the reduce function applied over windows of the specified
+                axis.
+
+        Notes
+        -----
+        The user-defined reduce function must reduce each window’s column
+        (row if axis=1) down to a single value.
+        """
+        pass
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def fold(self, axis, func):
+        """
+        Perform a function across an entire axis.
+
+        Parameters
+        ----------
+        axis : int
+            The axis to apply over.
+        func : callable
+            The function to apply.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+
+        Notes
+        -----
+        The data shape is not changed (length and width of the table).
+        """
+        new_partitions = self._partition_mgr_cls.map_axis_partitions(
+            axis, self._partitions, func, keep_partitioning=True
+        )
+        return self.__constructor__(
+            new_partitions,
+            self._index_cache,
+            self._columns_cache,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+        )
+
+    def infer_objects(self) -> "PandasDataframe":
+        """
+        Attempt to infer better dtypes for object columns.
+
+        Attempts soft conversion of object-dtyped columns, leaving non-object and unconvertible
+        columns unchanged. The inference rules are the same as during normal Series/DataFrame
+        construction.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe with the inferred schema.
+        """
+        obj_cols = [
+            col for col, dtype in enumerate(self.dtypes) if is_object_dtype(dtype)
+        ]
+        return self.infer_types(obj_cols)
+
+    def infer_types(self, col_labels: list[str]) -> "PandasDataframe":
+        """
+        Determine the compatible type shared by all values in the specified columns, and coerce them to that type.
+
+        Parameters
+        ----------
+        col_labels : list
+            List of column labels to infer and induce types over.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe with the inferred schema.
+        """
+        # Compute dtypes on the specified columns, and then set those dtypes on a new frame
+        new_cols = self.take_2d_labels_or_positional(col_labels=col_labels)
+        new_cols_dtypes = new_cols.tree_reduce(0, pandas.DataFrame.infer_objects).dtypes
+        new_dtypes = self.dtypes.copy()
+        new_dtypes[col_labels] = new_cols_dtypes
+        return self.__constructor__(
+            self._partitions,
+            self._index_cache,
+            self._columns_cache,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+            new_dtypes,
+        )
+
+    def join(
+        self,
+        axis: Union[int, Axis],
+        condition: Callable,
+        other,  # other: ModinDataframe,
+        join_type: Union[str, JoinType],
+    ) -> "PandasDataframe":
+        """
+        Join this dataframe with the other.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to perform the join on.
+        condition : callable
+            Function that determines which rows should be joined. The condition can be a
+            simple equality, e.g. "left.col1 == right.col1" or can be arbitrarily complex.
+        other : ModinDataframe
+            The other data to join with, i.e. the right dataframe.
+        join_type : string {"inner", "left", "right", "outer"} or modin.core.dataframe.base.utils.JoinType
+            The type of join to perform.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe that is the result of applying the specified join over the two
+            dataframes.
+
+        Notes
+        -----
+        During the join, this dataframe is considered the left, while the other is
+        treated as the right.
+
+        Only inner joins, left outer, right outer, and full outer joins are currently supported.
+        Support for other join types (e.g. natural join) may be implemented in the future.
+        """
+        pass
+
+    def rename(
+        self,
+        new_row_labels: Optional[Union[dict[Hashable, Hashable], Callable]] = None,
+        new_col_labels: Optional[Union[dict[Hashable, Hashable], Callable]] = None,
+        level: Optional[Union[int, list[int]]] = None,
+    ) -> "PandasDataframe":
+        """
+        Replace the row and column labels with the specified new labels.
+
+        Parameters
+        ----------
+        new_row_labels : dictionary or callable, optional
+            Mapping or callable that relates old row labels to new labels.
+        new_col_labels : dictionary or callable, optional
+            Mapping or callable that relates old col labels to new labels.
+        level : int, optional
+            Level whose row labels to replace.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe with the new row and column labels.
+
+        Notes
+        -----
+        If level is not specified, the default behavior is to replace row labels in all levels.
+        """
+        new_index = self.index.copy()
+
+        def make_label_swapper(label_dict):
+            if isinstance(label_dict, dict):
+                return lambda label: label_dict.get(label, label)
+            return label_dict
+
+        def swap_labels_levels(index_tuple):
+            if isinstance(new_row_labels, dict):
+                return tuple(new_row_labels.get(label, label) for label in index_tuple)
+            return tuple(new_row_labels(label) for label in index_tuple)
+
+        if new_row_labels:
+            swap_row_labels = make_label_swapper(new_row_labels)
+            if isinstance(self.index, pandas.MultiIndex):
+                if level is not None:
+                    new_index.set_levels(
+                        new_index.levels[level].map(swap_row_labels), level
+                    )
+                else:
+                    new_index = new_index.map(swap_labels_levels)
+            else:
+                new_index = new_index.map(swap_row_labels)
+        new_cols = self.columns.copy()
+        if new_col_labels:
+            new_cols = new_cols.map(make_label_swapper(new_col_labels))
+
+        def map_fn(df):
+            return df.rename(index=new_row_labels, columns=new_col_labels, level=level)
+
+        new_parts = self._partition_mgr_cls.map_partitions(self._partitions, map_fn)
+        new_dtypes = None if self._dtypes is None else self._dtypes.set_axis(new_cols)
+        return self.__constructor__(
+            new_parts,
+            new_index,
+            new_cols,
+            self._row_lengths_cache,
+            self._column_widths_cache,
+            new_dtypes,
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def sort_by(
+        self,
+        axis: Union[int, Axis],
+        columns: Union[str, list[str]],
+        ascending: bool = True,
+        **kwargs,
+    ) -> "PandasDataframe":
+        """
+        Logically reorder rows (columns if axis=1) lexicographically by the data in a column or set of columns.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to perform the sort over.
+        columns : string or list
+            Column label(s) to use to determine lexicographical ordering.
+        ascending : boolean, default: True
+            Whether to sort in ascending or descending order.
+        **kwargs : dict
+            Keyword arguments to pass when sorting partitions.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe sorted into lexicographical order by the specified column(s).
+        """
+        if not isinstance(columns, list):
+            columns = [columns]
+        # When we do a sort on the result of Series.value_counts, we don't rename the index until
+        # after everything is done, which causes an error when sorting the partitions, since the
+        # index and the column share the same name, when in actuality, the index's name should be
+        # None. This fixes the indexes name beforehand in that case, so that the sort works.
+
+        def sort_function(df):
+            index_renaming = None
+            if any(name in df.columns for name in df.index.names):
+                index_renaming = df.index.names
+                df.index = df.index.set_names([None] * len(df.index.names))
+            df = df.sort_values(by=columns, ascending=ascending, **kwargs)
+            if index_renaming is not None:
+                df.index = df.index.set_names(index_renaming)
+            return df
+
+        axis = Axis(axis)
+        if axis != Axis.ROW_WISE:
+            raise NotImplementedError(
+                f"Algebra sort only implemented row-wise. {axis.name} sort not implemented yet!"
+            )
+
+        # If this df is empty, we don't want to try and shuffle or sort.
+        if len(self.axes[0]) == 0 or len(self.axes[1]) == 0:
+            return self.copy()
+        # If this df only has one row partition, we don't want to do a shuffle and sort - we can
+        # just do a full-axis sort.
+        if len(self._partitions) == 1:
+            return self.apply_full_axis(
+                1,
+                sort_function,
+            )
+        if self.dtypes[columns[0]] == object:
+            # This means we are not sorting numbers, so we need our quantiles to not try
+            # arithmetic on the values.
+            method = "inverted_cdf"
+        else:
+            method = "linear"
+
+        shuffling_functions = build_sort_functions(  # noqa F821
+            self,
+            columns[0],
+            method,
+            ascending[0] if is_list_like(ascending) else ascending,
+            **kwargs,
+        )
+        major_col_partition_index = self.columns.get_loc(columns[0])
+        cols_seen = 0
+        index = -1
+        for i, length in enumerate(self.column_widths):
+            cols_seen += length
+            if major_col_partition_index < cols_seen:
+                index = i
+                break
+        new_partitions = self._partition_mgr_cls.shuffle_partitions(
+            self._partitions,
+            index,
+            shuffling_functions,
+            sort_function,
+        )
+        new_axes = self.axes
+        new_lengths = [None, None]
+        if kwargs.get("ignore_index", False):
+            new_axes[axis.value] = RangeIndex(len(new_axes[axis.value]))
+        else:
+            (
+                new_axes[axis.value],
+                new_lengths[axis.value],
+            ) = self._compute_axis_labels_and_lengths(axis.value, new_partitions)
+
+        new_axes[axis.value] = new_axes[axis.value].set_names(
+            self.axes[axis.value].names
+        )
+        # We perform the final steps of the sort on full axis partitions, so we know that the
+        # length of each partition is the full length of the dataframe.
+        new_lengths[axis.value ^ 1] = [len(self.columns)]
+        # Since the strategy to pick our pivots involves random sampling
+        # we could end up picking poor pivots, leading to skew in our partitions.
+        # We should add a fix to check if there is skew in the partitions and rebalance
+        # them if necessary. Calling `rebalance_partitions` won't do this, since it only
+        # resolves the case where there isn't the right amount of partitions - not where
+        # there is skew across the lengths of partitions.
+        new_modin_frame = self.__constructor__(
+            new_partitions, *new_axes, *new_lengths, self.dtypes
+        )
+        if kwargs.get("ignore_index", False):
+            new_modin_frame._propagate_index_objs(axis=0)
+        return new_modin_frame
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def filter(self, axis: Union[Axis, int], condition: Callable) -> "PandasDataframe":
+        """
+        Filter data based on the function provided along an entire axis.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to filter over.
+        condition : callable(row|col) -> bool
+            The function to use for the filter. This function should filter the
+            data itself.
+
+        Returns
+        -------
+        PandasDataframe
+            A new filtered dataframe.
+        """
+        axis = Axis(axis)
+        assert axis in (
+            Axis.ROW_WISE,
+            Axis.COL_WISE,
+        ), "Axis argument to filter operator must be 0 (rows) or 1 (columns)"
+
+        new_partitions = self._partition_mgr_cls.map_axis_partitions(
+            axis.value, self._partitions, condition, keep_partitioning=True
+        )
+
+        new_axes, new_lengths = [0, 0], [0, 0]
+
+        new_axes[axis.value] = (
+            self._index_cache if axis.value == 0 else self._columns_cache
+        )
+        new_lengths[axis.value] = (
+            self._row_lengths_cache if axis.value == 0 else self._column_widths_cache
+        )
+        new_axes[axis.value ^ 1], new_lengths[axis.value ^ 1] = None, None
+
+        return self.__constructor__(
+            new_partitions,
+            *new_axes,
+            *new_lengths,
+            self._dtypes if axis == Axis.COL_WISE else None,
+        )
+
+    def filter_by_types(self, types: list[Hashable]) -> "PandasDataframe":
+        """
+        Allow the user to specify a type or set of types by which to filter the columns.
+
+        Parameters
+        ----------
+        types : list
+            The types to filter columns by.
+
+        Returns
+        -------
+        PandasDataframe
+             A new PandasDataframe from the filter provided.
+        """
+        return self.take_2d_labels_or_positional(
+            col_positions=[i for i, dtype in enumerate(self.dtypes) if dtype in types]
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def explode(self, axis: Union[int, Axis], func: Callable) -> "PandasDataframe":
+        """
+        Explode list-like entries along an entire axis.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis specifying how to explode. If axis=1, explode according
+            to columns.
+        func : callable
+            The function to use to explode a single element.
+
+        Returns
+        -------
+        PandasFrame
+            A new filtered dataframe.
+        """
+        axis = Axis(axis)
+        partitions = self._partition_mgr_cls.map_axis_partitions(
+            axis.value, self._partitions, func, keep_partitioning=True
+        )
+        if axis == Axis.COL_WISE:
+            new_index, row_lengths = self._compute_axis_labels_and_lengths(
+                0, partitions
+            )
+            new_columns, column_widths = self.columns, self._column_widths_cache
+        else:
+            new_index, row_lengths = self.index, self._row_lengths_cache
+            new_columns, column_widths = self._compute_axis_labels_and_lengths(
+                1, partitions
+            )
+        return self.__constructor__(
+            partitions, new_index, new_columns, row_lengths, column_widths
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def apply_full_axis(
+        self,
+        axis,
+        func,
+        new_index=None,
+        new_columns=None,
+        dtypes=None,
+        keep_partitioning=True,
+        sync_labels=True,
+        pass_axis_lengths_to_partitions=False,
+    ):
+        """
+        Perform a function across an entire axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            The axis to apply over (0 - rows, 1 - columns).
+        func : callable
+            The function to apply.
+        new_index : list-like, optional
+            The index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : list-like, optional
+            The columns of the result. We may know this in
+            advance, and if not provided it must be computed.
+        dtypes : list-like, optional
+            The data types of the result. This is an optimization
+            because there are functions that always result in a particular data
+            type, and allows us to avoid (re)computing it.
+        keep_partitioning : boolean, default: True
+            The flag to keep partition boundaries for Modin Frame.
+            Setting it to True disables shuffling data from one partition to another.
+        sync_labels : boolean, default: True
+            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.
+            This could be used when you're certain that the indices in partitions are equal to
+            the provided hints in order to save time on syncing them.
+        pass_axis_lengths_to_partitions : bool, default: False
+            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.
+            Note that `func` must be able to obtain `df, *axis_lengths`.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+
+        Notes
+        -----
+        The data shape may change as a result of the function.
+        """
+        return self.broadcast_apply_full_axis(
+            axis=axis,
+            func=func,
+            new_index=new_index,
+            new_columns=new_columns,
+            dtypes=dtypes,
+            other=None,
+            keep_partitioning=keep_partitioning,
+            sync_labels=sync_labels,
+            pass_axis_lengths_to_partitions=pass_axis_lengths_to_partitions,
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def apply_full_axis_select_indices(
+        self,
+        axis,
+        func,
+        apply_indices=None,
+        numeric_indices=None,
+        new_index=None,
+        new_columns=None,
+        keep_remaining=False,
+    ):
+        """
+        Apply a function across an entire axis for a subset of the data.
+
+        Parameters
+        ----------
+        axis : int
+            The axis to apply over.
+        func : callable
+            The function to apply.
+        apply_indices : list-like, default: None
+            The labels to apply over.
+        numeric_indices : list-like, default: None
+            The indices to apply over.
+        new_index : list-like, optional
+            The index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : list-like, optional
+            The columns of the result. We may know this in
+            advance, and if not provided it must be computed.
+        keep_remaining : boolean, default: False
+            Whether or not to drop the data that is not computed over.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+        """
+        assert apply_indices is not None or numeric_indices is not None
+        # Convert indices to numeric indices
+        old_index = self.index if axis else self.columns
+        if apply_indices is not None:
+            numeric_indices = old_index.get_indexer_for(apply_indices)
+        # Get the indices for the axis being applied to (it is the opposite of axis
+        # being applied over)
+        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
+        new_partitions = (
+            self._partition_mgr_cls.apply_func_to_select_indices_along_full_axis(
+                axis,
+                self._partitions,
+                func,
+                dict_indices,
+                keep_remaining=keep_remaining,
+            )
+        )
+        # TODO Infer columns and index from `keep_remaining` and `apply_indices`
+        if new_index is None:
+            new_index = self.index if axis == 1 else None
+        if new_columns is None:
+            new_columns = self.columns if axis == 0 else None
+        return self.__constructor__(new_partitions, new_index, new_columns, None, None)
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def apply_select_indices(
+        self,
+        axis,
+        func,
+        apply_indices=None,
+        row_labels=None,
+        col_labels=None,
+        new_index=None,
+        new_columns=None,
+        keep_remaining=False,
+        item_to_distribute=no_default,
+    ):
+        """
+        Apply a function for a subset of the data.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            The axis to apply over.
+        func : callable
+            The function to apply.
+        apply_indices : list-like, default: None
+            The labels to apply over. Must be given if axis is provided.
+        row_labels : list-like, default: None
+            The row labels to apply over. Must be provided with
+            `col_labels` to apply over both axes.
+        col_labels : list-like, default: None
+            The column labels to apply over. Must be provided
+            with `row_labels` to apply over both axes.
+        new_index : list-like, optional
+            The index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : list-like, optional
+            The columns of the result. We may know this in
+            advance, and if not provided it must be computed.
+        keep_remaining : boolean, default: False
+            Whether or not to drop the data that is not computed over.
+        item_to_distribute : np.ndarray or scalar, default: no_default
+            The item to split up so it can be applied over both axes.
+
+        Returns
+        -------
+        PandasDataframe
+            A new dataframe.
+        """
+        # TODO Infer columns and index from `keep_remaining` and `apply_indices`
+        if new_index is None:
+            new_index = self.index if axis == 1 else None
+        if new_columns is None:
+            new_columns = self.columns if axis == 0 else None
+        if axis is not None:
+            assert apply_indices is not None
+            # Convert indices to numeric indices
+            old_index = self.index if axis else self.columns
+            numeric_indices = old_index.get_indexer_for(apply_indices)
+            # Get indices being applied to (opposite of indices being applied over)
+            dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
+            new_partitions = self._partition_mgr_cls.apply_func_to_select_indices(
+                axis,
+                self._partitions,
+                func,
+                dict_indices,
+                keep_remaining=keep_remaining,
+            )
+            # Length objects for new object creation. This is shorter than if..else
+            # This object determines the lengths and widths based on the given
+            # parameters and builds a dictionary used in the constructor below. 0 gives
+            # the row lengths and 1 gives the column widths. Since the dimension of
+            # `axis` given may have changed, we currently just recompute it.
+            # TODO Determine lengths from current lengths if `keep_remaining=False`
+            lengths_objs = {
+                axis: [len(apply_indices)]
+                if not keep_remaining
+                else [self.row_lengths, self.column_widths][axis],
+                axis ^ 1: [self.row_lengths, self.column_widths][axis ^ 1],
+            }
+            return self.__constructor__(
+                new_partitions, new_index, new_columns, lengths_objs[0], lengths_objs[1]
+            )
+        else:
+            # We are applying over both axes here, so make sure we have all the right
+            # variables set.
+            assert row_labels is not None and col_labels is not None
+            assert keep_remaining
+            assert item_to_distribute is not no_default
+            row_partitions_list = self._get_dict_of_block_index(0, row_labels).items()
+            col_partitions_list = self._get_dict_of_block_index(1, col_labels).items()
+            new_partitions = self._partition_mgr_cls.apply_func_to_indices_both_axis(
+                self._partitions,
+                func,
+                row_partitions_list,
+                col_partitions_list,
+                item_to_distribute,
+                # Passing caches instead of values in order to not trigger shapes recomputation
+                # if they are not used inside this function.
+                self._row_lengths_cache,
+                self._column_widths_cache,
+            )
+            return self.__constructor__(
+                new_partitions,
+                new_index,
+                new_columns,
+                self._row_lengths_cache,
+                self._column_widths_cache,
+            )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def broadcast_apply(
+        self, axis, func, other, join_type="left", labels="keep", dtypes=None
+    ):
+        """
+        Broadcast axis partitions of `other` to partitions of `self` and apply a function.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to broadcast over.
+        func : callable
+            Function to apply.
+        other : PandasDataframe
+            Modin DataFrame to broadcast.
+        join_type : str, default: "left"
+            Type of join to apply.
+        labels : {"keep", "replace", "drop"}, default: "keep"
+            Whether keep labels from `self` Modin DataFrame, replace them with labels
+            from joined DataFrame or drop altogether to make them be computed lazily later.
+        dtypes : "copy" or None, default: None
+            Whether keep old dtypes or infer new dtypes from data.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        # Only sort the indices if they do not match
+        (
+            left_parts,
+            right_parts,
+            joined_index,
+            partition_sizes_along_axis,
+        ) = self._copartition(
+            axis, other, join_type, sort=not self.axes[axis].equals(other.axes[axis])
+        )
+        # unwrap list returned by `copartition`.
+        right_parts = right_parts[0]
+        new_frame = self._partition_mgr_cls.broadcast_apply(
+            axis, func, left_parts, right_parts
+        )
+        if dtypes == "copy":
+            dtypes = self._dtypes
+
+        def _pick_axis(get_axis, sizes_cache):
+            if labels == "keep":
+                return get_axis(), sizes_cache
+            if labels == "replace":
+                return joined_index, partition_sizes_along_axis
+            assert labels == "drop", f"Unexpected `labels`: {labels}"
+            return None, None
+
+        if axis == 0:
+            # Pass shape caches instead of values in order to not trigger shape computation.
+            new_index, new_row_lengths = _pick_axis(
+                self._get_index, self._row_lengths_cache
+            )
+            new_columns, new_column_widths = self.columns, self._column_widths_cache
+        else:
+            new_index, new_row_lengths = self.index, self._row_lengths_cache
+            new_columns, new_column_widths = _pick_axis(
+                self._get_columns, self._column_widths_cache
+            )
+
+        return self.__constructor__(
+            new_frame,
+            new_index,
+            new_columns,
+            new_row_lengths,
+            new_column_widths,
+            dtypes=dtypes,
+        )
+
+    def _prepare_frame_to_broadcast(self, axis, indices, broadcast_all):
+        """
+        Compute the indices to broadcast `self` considering `indices`.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to broadcast along.
+        indices : dict
+            Dict of indices and internal indices of partitions where `self` must
+            be broadcasted.
+        broadcast_all : bool
+            Whether broadcast the whole axis of `self` frame or just a subset of it.
+
+        Returns
+        -------
+        dict
+            Dictionary with indices of partitions to broadcast.
+
+        Notes
+        -----
+        New dictionary of indices of `self` partitions represents that
+        you want to broadcast `self` at specified another partition named `other`. For example,
+        Dictionary {key: {key1: [0, 1], key2: [5]}} means, that in `other`[key] you want to
+        broadcast [self[key1], self[key2]] partitions and internal indices for `self` must be [[0, 1], [5]]
+        """
+        if broadcast_all:
+            sizes = self.row_lengths if axis else self.column_widths
+            return {key: dict(enumerate(sizes)) for key in indices.keys()}
+        passed_len = 0
+        result_dict = {}
+        for part_num, internal in indices.items():
+            result_dict[part_num] = self._get_dict_of_block_index(
+                axis ^ 1, np.arange(passed_len, passed_len + len(internal))
+            )
+            passed_len += len(internal)
+        return result_dict
+
+    def __make_init_labels_args(self, partitions, index, columns) -> dict:
+        kw = {}
+        kw["index"], kw["row_lengths"] = (
+            self._compute_axis_labels_and_lengths(0, partitions)
+            if index is None
+            else (index, None)
+        )
+        kw["columns"], kw["column_widths"] = (
+            self._compute_axis_labels_and_lengths(1, partitions)
+            if columns is None
+            else (columns, None)
+        )
+        return kw
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def broadcast_apply_select_indices(
+        self,
+        axis,
+        func,
+        other,
+        apply_indices=None,
+        numeric_indices=None,
+        keep_remaining=False,
+        broadcast_all=True,
+        new_index=None,
+        new_columns=None,
+    ):
+        """
+        Apply a function to select indices at specified axis and broadcast partitions of `other` Modin DataFrame.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to apply function along.
+        func : callable
+            Function to apply.
+        other : PandasDataframe
+            Partitions of which should be broadcasted.
+        apply_indices : list, default: None
+            List of labels to apply (if `numeric_indices` are not specified).
+        numeric_indices : list, default: None
+            Numeric indices to apply (if `apply_indices` are not specified).
+        keep_remaining : bool, default: False
+            Whether drop the data that is not computed over or not.
+        broadcast_all : bool, default: True
+            Whether broadcast the whole axis of right frame to every
+            partition or just a subset of it.
+        new_index : pandas.Index, optional
+            Index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : pandas.Index, optional
+            Columns of the result. We may know this in advance,
+            and if not provided it must be computed.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        assert (
+            apply_indices is not None or numeric_indices is not None
+        ), "Indices to apply must be specified!"
+
+        if other is None:
+            if apply_indices is None:
+                apply_indices = self.axes[axis][numeric_indices]
+            return self.apply_select_indices(
+                axis=axis,
+                func=func,
+                apply_indices=apply_indices,
+                keep_remaining=keep_remaining,
+                new_index=new_index,
+                new_columns=new_columns,
+            )
+
+        if numeric_indices is None:
+            old_index = self.index if axis else self.columns
+            numeric_indices = old_index.get_indexer_for(apply_indices)
+
+        dict_indices = self._get_dict_of_block_index(axis ^ 1, numeric_indices)
+        broadcasted_dict = other._prepare_frame_to_broadcast(
+            axis, dict_indices, broadcast_all=broadcast_all
+        )
+        new_partitions = self._partition_mgr_cls.broadcast_apply_select_indices(
+            axis,
+            func,
+            self._partitions,
+            other._partitions,
+            dict_indices,
+            broadcasted_dict,
+            keep_remaining,
+        )
+
+        kw = self.__make_init_labels_args(new_partitions, new_index, new_columns)
+        return self.__constructor__(new_partitions, **kw)
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def broadcast_apply_full_axis(
+        self,
+        axis,
+        func,
+        other,
+        new_index=None,
+        new_columns=None,
+        apply_indices=None,
+        enumerate_partitions=False,
+        dtypes=None,
+        keep_partitioning=True,
+        sync_labels=True,
+        pass_axis_lengths_to_partitions=False,
+    ):
+        """
+        Broadcast partitions of `other` Modin DataFrame and apply a function along full axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to apply over (0 - rows, 1 - columns).
+        func : callable
+            Function to apply.
+        other : PandasDataframe or list
+            Modin DataFrame(s) to broadcast.
+        new_index : list-like, optional
+            Index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : list-like, optional
+            Columns of the result. We may know this in
+            advance, and if not provided it must be computed.
+        apply_indices : list-like, default: None
+            Indices of `axis ^ 1` to apply function over.
+        enumerate_partitions : bool, default: False
+            Whether pass partition index into applied `func` or not.
+            Note that `func` must be able to obtain `partition_idx` kwarg.
+        dtypes : list-like, default: None
+            Data types of the result. This is an optimization
+            because there are functions that always result in a particular data
+            type, and allows us to avoid (re)computing it.
+        keep_partitioning : boolean, default: True
+            The flag to keep partition boundaries for Modin Frame.
+            Setting it to True disables shuffling data from one partition to another.
+        sync_labels : boolean, default: True
+            Synchronize external indexes (`new_index`, `new_columns`) with internal indexes.
+            This could be used when you're certain that the indices in partitions are equal to
+            the provided hints in order to save time on syncing them.
+        pass_axis_lengths_to_partitions : bool, default: False
+            Whether pass partition lengths along `axis ^ 1` to the kernel `func`.
+            Note that `func` must be able to obtain `df, *axis_lengths`.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        if other is not None:
+            if not isinstance(other, list):
+                other = [other]
+            other = [o._partitions for o in other] if len(other) else None
+
+        if apply_indices is not None:
+            numeric_indices = self.axes[axis ^ 1].get_indexer_for(apply_indices)
+            apply_indices = self._get_dict_of_block_index(
+                axis ^ 1, numeric_indices
+            ).keys()
+
+        apply_func_args = None
+        if pass_axis_lengths_to_partitions:
+            if axis == 0:
+                apply_func_args = (
+                    self._column_widths_cache
+                    if self._column_widths_cache is not None
+                    else [part.width(materialize=False) for part in self._partitions[0]]
+                )
+            else:
+                apply_func_args = (
+                    self._row_lengths_cache
+                    if self._row_lengths_cache is not None
+                    else [
+                        part.length(materialize=False) for part in self._partitions.T[0]
+                    ]
+                )
+
+        new_partitions = self._partition_mgr_cls.broadcast_axis_partitions(
+            axis=axis,
+            left=self._partitions,
+            right=other,
+            apply_func=self._build_treereduce_func(axis, func),
+            apply_indices=apply_indices,
+            enumerate_partitions=enumerate_partitions,
+            keep_partitioning=keep_partitioning,
+            apply_func_args=apply_func_args,
+        )
+        kw = {"row_lengths": None, "column_widths": None}
+        if dtypes == "copy":
+            kw["dtypes"] = self._dtypes
+        elif dtypes is not None:
+            if new_columns is None:
+                (
+                    new_columns,
+                    kw["column_widths"],
+                ) = self._compute_axis_labels_and_lengths(1, new_partitions)
+            kw["dtypes"] = pandas.Series(
+                [np.dtype(dtypes)] * len(new_columns), index=new_columns
+            )
+
+        if not keep_partitioning:
+            if kw["row_lengths"] is None and new_index is not None:
+                if axis == 0:
+                    kw["row_lengths"] = get_length_list(  # noqa F821
+                        axis_len=len(new_index), num_splits=new_partitions.shape[0]
+                    )
+                elif (
+                    axis == 1
+                    and self._row_lengths_cache is not None
+                    and len(new_index) == sum(self._row_lengths_cache)
+                ):
+                    kw["row_lengths"] = self._row_lengths_cache
+            if kw["column_widths"] is None and new_columns is not None:
+                if axis == 1:
+                    kw["column_widths"] = get_length_list(  # noqa F821
+                        axis_len=len(new_columns),
+                        num_splits=new_partitions.shape[1],
+                    )
+                elif (
+                    axis == 0
+                    and self._column_widths_cache is not None
+                    and len(new_columns) == sum(self._column_widths_cache)
+                ):
+                    kw["column_widths"] = self._column_widths_cache
+        result = self.__constructor__(
+            new_partitions, index=new_index, columns=new_columns, **kw
+        )
+        if sync_labels and new_index is not None:
+            result.synchronize_labels(axis=0)
+        if sync_labels and new_columns is not None:
+            result.synchronize_labels(axis=1)
+        return result
+
+    def _copartition(self, axis, other, how, sort, force_repartition=False):
+        """
+        Copartition two Modin DataFrames.
+
+        Perform aligning of partitions, index and partition blocks.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to copartition along (0 - rows, 1 - columns).
+        other : PandasDataframe
+            Other Modin DataFrame(s) to copartition against.
+        how : str
+            How to manage joining the index object ("left", "right", etc.).
+        sort : bool
+            Whether sort the joined index or not.
+        force_repartition : bool, default: False
+            Whether force the repartitioning or not. By default,
+            this method will skip repartitioning if it is possible. This is because
+            reindexing is extremely inefficient. Because this method is used to
+            `join` or `append`, it is vital that the internal indices match.
+
+        Returns
+        -------
+        tuple
+            Tuple containing:
+                1) 2-d NumPy array of aligned left partitions
+                2) list of 2-d NumPy arrays of aligned right partitions
+                3) joined index along ``axis``
+                4) List with sizes of partitions along axis that partitioning
+                   was done on. This list will be empty if and only if all
+                   the frames are empty.
+        """
+        if isinstance(other, type(self)):
+            other = [other]
+
+        self_index = self.axes[axis]
+        others_index = [o.axes[axis] for o in other]
+        joined_index, make_reindexer = self._join_index_objects(
+            axis, [self_index] + others_index, how, sort
+        )
+
+        frames = [self] + other
+        non_empty_frames_idx = [
+            i for i, o in enumerate(frames) if o._partitions.size != 0
+        ]
+
+        # If all frames are empty
+        if len(non_empty_frames_idx) == 0:
+            return (
+                self._partitions,
+                [o._partitions for o in other],
+                joined_index,
+                # There are no partition sizes because the resulting dataframe
+                # has no partitions.
+                [],
+            )
+
+        base_frame_idx = non_empty_frames_idx[0]
+        other_frames = frames[base_frame_idx + 1 :]
+
+        # Picking first non-empty frame
+        base_frame = frames[non_empty_frames_idx[0]]
+        base_index = base_frame.axes[axis]
+
+        # define conditions for reindexing and repartitioning `self` frame
+        do_reindex_base = not base_index.equals(joined_index)
+        do_repartition_base = force_repartition or do_reindex_base
+
+        # Perform repartitioning and reindexing for `base_frame` if needed.
+        # Also define length of base and frames. We will need to know the
+        # lengths for alignment.
+        if do_repartition_base:
+            reindexed_base = base_frame._partition_mgr_cls.map_axis_partitions(
+                axis,
+                base_frame._partitions,
+                make_reindexer(do_reindex_base, base_frame_idx),
+            )
+            if axis:
+                base_lengths = [obj.width() for obj in reindexed_base[0]]
+            else:
+                base_lengths = [obj.length() for obj in reindexed_base.T[0]]
+        else:
+            reindexed_base = base_frame._partitions
+            base_lengths = base_frame.column_widths if axis else base_frame.row_lengths
+
+        others_lengths = [o._axes_lengths[axis] for o in other_frames]
+
+        # define conditions for reindexing and repartitioning `other` frames
+        do_reindex_others = [
+            not o.axes[axis].equals(joined_index) for o in other_frames
+        ]
+
+        do_repartition_others = [None] * len(other_frames)
+        for i in range(len(other_frames)):
+            do_repartition_others[i] = (
+                force_repartition
+                or do_reindex_others[i]
+                or others_lengths[i] != base_lengths
+            )
+
+        # perform repartitioning and reindexing for `other_frames` if needed
+        reindexed_other_list = [None] * len(other_frames)
+        for i in range(len(other_frames)):
+            if do_repartition_others[i]:
+                # indices of others frame start from `base_frame_idx` + 1
+                reindexed_other_list[i] = other_frames[
+                    i
+                ]._partition_mgr_cls.map_axis_partitions(
+                    axis,
+                    other_frames[i]._partitions,
+                    make_reindexer(do_repartition_others[i], base_frame_idx + 1 + i),
+                    lengths=base_lengths,
+                )
+            else:
+                reindexed_other_list[i] = other_frames[i]._partitions
+        reindexed_frames = (
+            [frames[i]._partitions for i in range(base_frame_idx)]
+            + [reindexed_base]
+            + reindexed_other_list
+        )
+        return (reindexed_frames[0], reindexed_frames[1:], joined_index, base_lengths)
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def n_ary_op(
+        self,
+        op,
+        right_frames: list,
+        join_type="outer",
+        copartition_along_columns=True,
+        dtypes=None,
+    ):
+        """
+        Perform an n-opary operation by joining with other Modin DataFrame(s).
+
+        Parameters
+        ----------
+        op : callable
+            Function to apply after the join.
+        right_frames : list of PandasDataframe
+            Modin DataFrames to join with.
+        join_type : str, default: "outer"
+            Type of join to apply.
+        copartition_along_columns : bool, default: True
+            Whether to perform copartitioning along columns or not.
+            For some ops this isn't needed (e.g., `fillna`).
+        dtypes : series, default: None
+            Dtypes of the resultant dataframe, this argument will be
+            received if the resultant dtypes of n-opary operation is precomputed.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        left_parts, list_of_right_parts, joined_index, row_lengths = self._copartition(
+            0, right_frames, join_type, sort=True
+        )
+        if copartition_along_columns:
+            new_left_frame = self.__constructor__(
+                left_parts, joined_index, self.columns, row_lengths, self.column_widths
+            )
+            new_right_frames = [
+                self.__constructor__(
+                    right_parts,
+                    joined_index,
+                    right_frame.columns,
+                    row_lengths,
+                    right_frame.column_widths,
+                )
+                for right_parts, right_frame in zip(list_of_right_parts, right_frames)
+            ]
+
+            (
+                left_parts,
+                list_of_right_parts,
+                joined_columns,
+                column_widths,
+            ) = new_left_frame._copartition(
+                1,
+                new_right_frames,
+                join_type,
+                sort=True,
+            )
+        else:
+            joined_columns = self._columns_cache
+            column_widths = self._column_widths_cache
+
+        new_frame = (
+            np.array([])
+            if len(left_parts) == 0
+            or any(len(right_parts) == 0 for right_parts in list_of_right_parts)
+            else self._partition_mgr_cls.n_ary_operation(
+                left_parts, op, list_of_right_parts
+            )
+        )
+
+        return self.__constructor__(
+            new_frame,
+            joined_index,
+            joined_columns,
+            row_lengths,
+            column_widths,
+            dtypes,
+        )
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def concat(
+        self,
+        axis: Union[int, Axis],
+        others: Union["PandasDataframe", list["PandasDataframe"]],
+        how,
+        sort,
+    ) -> "PandasDataframe":
+        """
+        Concatenate `self` with one or more other Modin DataFrames.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            Axis to concatenate over.
+        others : list
+            List of Modin DataFrames to concatenate with.
+        how : str
+            Type of join to use for the axis.
+        sort : bool
+            Whether sort the result or not.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        axis = Axis(axis)
+        new_widths = None
+        new_lengths = None
+
+        def _compute_new_widths():
+            widths = None
+            if self._column_widths_cache is not None and all(
+                o._column_widths_cache is not None for o in others
+            ):
+                widths = self._column_widths_cache + [
+                    width for o in others for width in o._column_widths_cache
+                ]
+            return widths
+
+        # Fast path for equivalent columns and partitioning
+        if (
+            axis == Axis.ROW_WISE
+            and all(o.columns.equals(self.columns) for o in others)
+            and all(o.column_widths == self.column_widths for o in others)
+        ):
+            joined_index = self.columns
+            left_parts = self._partitions
+            right_parts = [o._partitions for o in others]
+            new_widths = self._column_widths_cache
+        elif (
+            axis == Axis.COL_WISE
+            and all(o.index.equals(self.index) for o in others)
+            and all(o.row_lengths == self.row_lengths for o in others)
+        ):
+            joined_index = self.index
+            left_parts = self._partitions
+            right_parts = [o._partitions for o in others]
+            new_lengths = self._row_lengths_cache
+            # we can only do this for COL_WISE because `concat` might rebalance partitions for ROW_WISE
+            new_widths = _compute_new_widths()
+        else:
+            (
+                left_parts,
+                right_parts,
+                joined_index,
+                partition_sizes_along_axis,
+            ) = self._copartition(
+                axis.value ^ 1, others, how, sort, force_repartition=False
+            )
+            if axis == Axis.COL_WISE:
+                new_lengths = partition_sizes_along_axis
+                new_widths = _compute_new_widths()
+            else:
+                new_widths = partition_sizes_along_axis
+        new_partitions, new_lengths2 = self._partition_mgr_cls.concat(
+            axis.value, left_parts, right_parts
+        )
+        if new_lengths is None:
+            new_lengths = new_lengths2
+        new_dtypes = None
+        if axis == Axis.ROW_WISE:
+            new_index = self.index.append([other.index for other in others])
+            new_columns = joined_index
+            all_dtypes = [frame._dtypes for frame in [self] + others]
+            if all(dtypes is not None for dtypes in all_dtypes):
+                new_dtypes = pandas.concat(all_dtypes, axis=1)
+                # 'nan' value will be placed in a row if a column doesn't exist in all frames;
+                # this value is np.float64 type so we need an explicit conversion
+                new_dtypes.fillna(np.dtype("float64"), inplace=True)
+                new_dtypes = new_dtypes.apply(
+                    lambda row: find_common_type(row.values), axis=1
+                )
+            # If we have already cached the length of each row in at least one
+            # of the row's partitions, we can build new_lengths for the new
+            # frame. Typically, if we know the length for any partition in a
+            # row, we know the length for the first partition in the row. So
+            # just check the lengths of the first column of partitions.
+            if not new_lengths:
+                new_lengths = []
+                if new_partitions.size > 0:
+                    for part in new_partitions.T[0]:
+                        if part._length_cache is not None:
+                            new_lengths.append(part.length())
+                        else:
+                            new_lengths = None
+                            break
+        else:
+            new_columns = self.columns.append([other.columns for other in others])
+            new_index = joined_index
+            if self._dtypes is not None and all(o._dtypes is not None for o in others):
+                new_dtypes = pandas.concat([self.dtypes] + [o.dtypes for o in others])
+            # If we have already cached the width of each column in at least one
+            # of the column's partitions, we can build new_widths for the new
+            # frame. Typically, if we know the width for any partition in a
+            # column, we know the width for the first partition in the column.
+            # So just check the widths of the first row of partitions.
+            if not new_widths:
+                new_widths = []
+                if new_partitions.size > 0:
+                    for part in new_partitions[0]:
+                        if part._width_cache is not None:
+                            new_widths.append(part.width())
+                        else:
+                            new_widths = None
+                            break
+        return self.__constructor__(
+            new_partitions, new_index, new_columns, new_lengths, new_widths, new_dtypes
+        )
+
+    def groupby(
+        self,
+        axis: Union[int, Axis],
+        by: Union[str, list[str]],
+        operator: Callable,
+        result_schema: Optional[dict[Hashable, type]] = None,
+    ) -> "PandasDataframe":
+        """
+        Generate groups based on values in the input column(s) and perform the specified operation on each.
+
+        Parameters
+        ----------
+        axis : int or modin.core.dataframe.base.utils.Axis
+            The axis to apply the grouping over.
+        by : string or list of strings
+            One or more column labels to use for grouping.
+        operator : callable
+            The operation to carry out on each of the groups. The operator is another
+            algebraic operator with its own user-defined function parameter, depending
+            on the output desired by the user.
+        result_schema : dict, optional
+            Mapping from column labels to data types that represents the types of the output dataframe.
+
+        Returns
+        -------
+        PandasDataframe
+            A new PandasDataframe containing the groupings specified, with the operator
+                applied to each group.
+
+        Notes
+        -----
+        No communication between groups is allowed in this algebra implementation.
+
+        The number of rows (columns if axis=1) returned by the user-defined function
+        passed to the groupby may be at most the number of rows in the group, and
+        may be as small as a single row.
+
+        Unlike the pandas API, an intermediate “GROUP BY” object is not present in this
+        algebra implementation.
+        """
+        pass
+
+    @lazy_metadata_decorator(apply_axis="opposite", axis_arg=0)
+    def groupby_reduce(
+        self,
+        axis,
+        by,
+        map_func,
+        reduce_func,
+        new_index=None,
+        new_columns=None,
+        apply_indices=None,
+    ):
+        """
+        Groupby another Modin DataFrame dataframe and aggregate the result.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to groupby and aggregate over.
+        by : PandasDataframe or None
+            A Modin DataFrame to group by.
+        map_func : callable
+            Map component of the aggregation.
+        reduce_func : callable
+            Reduce component of the aggregation.
+        new_index : pandas.Index, optional
+            Index of the result. We may know this in advance,
+            and if not provided it must be computed.
+        new_columns : pandas.Index, optional
+            Columns of the result. We may know this in advance,
+            and if not provided it must be computed.
+        apply_indices : list-like, default: None
+            Indices of `axis ^ 1` to apply groupby over.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        by_parts = by if by is None else by._partitions
+        if by is None:
+            self._propagate_index_objs(axis=0)
+
+        if apply_indices is not None:
+            numeric_indices = self.axes[axis ^ 1].get_indexer_for(apply_indices)
+            apply_indices = list(
+                self._get_dict_of_block_index(axis ^ 1, numeric_indices).keys()
+            )
+
+        new_partitions = self._partition_mgr_cls.groupby_reduce(
+            axis, self._partitions, by_parts, map_func, reduce_func, apply_indices
+        )
+        kw = self.__make_init_labels_args(new_partitions, new_index, new_columns)
+        return self.__constructor__(new_partitions, **kw)
+
+    @classmethod
+    def from_pandas(cls, df):
+        """
+        Create a Modin DataFrame from a pandas DataFrame.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+            A pandas DataFrame.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        new_index = df.index
+        new_columns = df.columns
+        new_dtypes = df.dtypes
+        new_frame, new_lengths, new_widths = cls._partition_mgr_cls.from_pandas(
+            df, True
+        )
+        return cls(
+            new_frame,
+            new_index,
+            new_columns,
+            new_lengths,
+            new_widths,
+            dtypes=new_dtypes,
+        )
+
+    @classmethod
+    def from_arrow(cls, at):
+        """
+        Create a Modin DataFrame from an Arrow Table.
+
+        Parameters
+        ----------
+        at : pyarrow.table
+            Arrow Table.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        new_frame, new_lengths, new_widths = cls._partition_mgr_cls.from_arrow(
+            at, return_dims=True
+        )
+        new_columns = Index.__new__(Index, data=at.column_names, dtype="O")
+        new_index = Index.__new__(RangeIndex, data=range(at.num_rows))
+        new_dtypes = pandas.Series(
+            [cls._arrow_type_to_dtype(col.type) for col in at.columns],
+            index=at.column_names,
+        )
+        return cls(
+            partitions=new_frame,
+            index=new_index,
+            columns=new_columns,
+            row_lengths=new_lengths,
+            column_widths=new_widths,
+            dtypes=new_dtypes,
+        )
+
+    @classmethod
+    def _arrow_type_to_dtype(cls, arrow_type):
+        """
+        Convert an arrow data type to a pandas data type.
+
+        Parameters
+        ----------
+        arrow_type : arrow dtype
+            Arrow data type to be converted to a pandas data type.
+
+        Returns
+        -------
+        object
+            Any dtype compatible with pandas.
+        """
+        import pyarrow
+
+        try:
+            res = arrow_type.to_pandas_dtype()
+        # Conversion to pandas is not implemented for some arrow types,
+        # perform manual conversion for them:
+        except NotImplementedError:
+            if pyarrow.types.is_time(arrow_type):
+                res = np.dtype(datetime.time)
+            else:
+                raise
+
+        if not isinstance(res, (np.dtype, str)):
+            return np.dtype(res)
+        return res
+
+    @lazy_metadata_decorator(apply_axis="both")
+    def to_pandas(self):
+        """
+        Convert this Modin DataFrame to a pandas DataFrame.
+
+        Returns
+        -------
+        pandas.DataFrame
+        """
+        df = self._partition_mgr_cls.to_pandas(self._partitions)
+        if df.empty:
+            df = pandas.DataFrame(columns=self.columns, index=self.index)
+        else:
+            for axis, external_index in enumerate(
+                [self._index_cache, self._columns_cache]
+            ):
+                # no need to check external and internal axes since in that case
+                # external axes will be computed from internal partitions
+                if external_index is not None:
+                    ErrorMessage.internal_error(
+                        not df.axes[axis].equals(external_index),
+                        f"Internal and external indices on axis {axis} do not match.",
+                    )
+                    # have to do this in order to assign some potentially missing metadata,
+                    # the ones that were set to the external index but were never propagated
+                    # into the internal ones
+                    df = df.set_axis(axis=axis, labels=external_index, copy=False)
+
+        return df
+
+    def to_numpy(self, **kwargs):
+        """
+        Convert this Modin DataFrame to a NumPy array.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments to be passed in `to_numpy`.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return self._partition_mgr_cls.to_numpy(self._partitions, **kwargs)
+
+    @lazy_metadata_decorator(apply_axis=None, transpose=True)
+    def transpose(self):
+        """
+        Transpose the index and columns of this Modin DataFrame.
+
+        Reflect this Modin DataFrame over its main diagonal
+        by writing rows as columns and vice-versa.
+
+        Returns
+        -------
+        PandasDataframe
+            New Modin DataFrame.
+        """
+        new_partitions = self._partition_mgr_cls.lazy_map_partitions(
+            self._partitions, lambda df: df.T
+        ).T
+        if self._dtypes is not None:
+            new_dtypes = pandas.Series(
+                np.full(len(self.index), find_common_type(self.dtypes.values)),
+                index=self.index,
+            )
+        else:
+            new_dtypes = None
+        return self.__constructor__(
+            new_partitions,
+            self._columns_cache,
+            self._index_cache,
+            self._column_widths_cache,
+            self._row_lengths_cache,
+            dtypes=new_dtypes,
+        )
+
+    def finalize(self):
+        """
+        Perform all deferred calls on partitions.
+
+        This makes `self` Modin Dataframe independent of a history of queries
+        that were used to build it.
+        """
+        self._partition_mgr_cls.finalize(self._partitions)
+
+    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
+        """
+        Get a Modin DataFrame that implements the dataframe exchange protocol.
+
+        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default: False
+            A keyword intended for the consumer to tell the producer
+            to overwrite null values in the data with ``NaN`` (or ``NaT``).
+            This currently has no effect; once support for nullable extension
+            dtypes is added, this value should be propagated to columns.
+        allow_copy : bool, default: True
+            A keyword that defines whether or not the library is allowed
+            to make a copy of the data. For example, copying data would be necessary
+            if a library supports strided buffers, given that this protocol
+            specifies contiguous buffers. Currently, if the flag is set to ``False``
+            and a copy is needed, a ``RuntimeError`` will be raised.
+
+        Returns
+        -------
+        ProtocolDataframe
+            A dataframe object following the dataframe protocol specification.
+        """
+        from modin.core.dataframe.pandas.interchange.dataframe_protocol.dataframe import (
+            PandasProtocolDataframe,
+        )
+
+        return PandasProtocolDataframe(
+            self, nan_as_null=nan_as_null, allow_copy=allow_copy
+        )
+
+    @classmethod
+    def from_dataframe(cls, df: "ProtocolDataframe") -> "PandasDataframe":  # noqa F821
+        """
+        Convert a DataFrame implementing the dataframe exchange protocol to a Core Modin Dataframe.
+
+        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+        Parameters
+        ----------
+        df : ProtocolDataframe
+            The DataFrame object supporting the dataframe exchange protocol.
+
+        Returns
+        -------
+        PandasDataframe
+            A new Core Modin Dataframe object.
+        """
+        if type(df) == cls:
+            return df
+
+        if not hasattr(df, "__dataframe__"):
+            raise ValueError(
+                "`df` does not support DataFrame exchange protocol, i.e. `__dataframe__` method"
+            )
+
+        from modin.core.dataframe.pandas.interchange.dataframe_protocol.from_dataframe import (
+            from_dataframe_to_pandas,
+        )
+
+        pandas_df = from_dataframe_to_pandas(df)
+        return cls.from_pandas(pandas_df)
diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py
new file mode 100644
index 00000000000..d2650e84640
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/__init__.py
@@ -0,0 +1,348 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+import sys
+import warnings
+from typing import Any
+
+import pandas
+
+__pandas_version__ = "2.2.1"
+
+
+if sys.version_info.major == 3 and sys.version_info.minor == 8:
+    raise RuntimeError(
+        "Snowpark pandas does not support Python 3.8. Please update to Python 3.9 or later, and"
+        + f" update your pandas version to {__pandas_version__}."
+    )  # pragma: no cover
+
+if pandas.__version__ != __pandas_version__:
+    raise RuntimeError(
+        f"The pandas version installed ({pandas.__version__}) does not match the supported pandas version in"
+        + f" Snowpark pandas ({__pandas_version__}). Please update with `pip install pandas=={__pandas_version__}`."
+    )  # pragma: no cover
+
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore")
+    from pandas import describe_option  # noqa: F401
+    from pandas import get_option  # noqa: F401
+    from pandas import option_context  # noqa: F401
+    from pandas import reset_option  # noqa: F401
+    from pandas import (  # noqa: F401
+        NA,
+        ArrowDtype,
+        BooleanDtype,
+        Categorical,
+        CategoricalDtype,
+        CategoricalIndex,
+        DateOffset,
+        DatetimeIndex,
+        DatetimeTZDtype,
+        ExcelWriter,
+        Flags,
+        Float32Dtype,
+        Float64Dtype,
+        Grouper,
+        Index,
+        IndexSlice,
+        Int8Dtype,
+        Int16Dtype,
+        Int32Dtype,
+        Int64Dtype,
+        Interval,
+        IntervalDtype,
+        IntervalIndex,
+        MultiIndex,
+        NamedAgg,
+        NaT,
+        Period,
+        PeriodDtype,
+        PeriodIndex,
+        RangeIndex,
+        SparseDtype,
+        StringDtype,
+        Timedelta,
+        TimedeltaIndex,
+        Timestamp,
+        UInt8Dtype,
+        UInt16Dtype,
+        UInt32Dtype,
+        UInt64Dtype,
+        api,
+        array,
+        bdate_range,
+        eval,
+        factorize,
+        from_dummies,
+        infer_freq,
+        interval_range,
+        offsets,
+        options,
+        period_range,
+        set_eng_float_format,
+        set_option,
+        test,
+        timedelta_range,
+    )
+
+# TODO: SNOW-851745 make sure add all Snowpark pandas API general functions
+from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+from snowflake.snowpark.modin.pandas.general import (
+    concat,
+    crosstab,
+    cut,
+    date_range,
+    get_dummies,
+    isna,
+    isnull,
+    lreshape,
+    melt,
+    merge,
+    merge_asof,
+    merge_ordered,
+    notna,
+    notnull,
+    pivot,
+    pivot_table,
+    qcut,
+    to_datetime,
+    to_numeric,
+    to_timedelta,
+    unique,
+    value_counts,
+    wide_to_long,
+)
+from snowflake.snowpark.modin.pandas.io import (
+    ExcelFile,
+    HDFStore,
+    json_normalize,
+    read_clipboard,
+    read_csv,
+    read_excel,
+    read_feather,
+    read_fwf,
+    read_gbq,
+    read_hdf,
+    read_html,
+    read_json,
+    read_orc,
+    read_parquet,
+    read_pickle,
+    read_sas,
+    read_spss,
+    read_sql,
+    read_sql_query,
+    read_sql_table,
+    read_stata,
+    read_table,
+    read_xml,
+    to_pickle,
+)
+from snowflake.snowpark.modin.pandas.plotting import Plotting as plotting
+from snowflake.snowpark.modin.pandas.series import Series
+from snowflake.snowpark.modin.plugin._internal.session import SnowpandasSessionHolder
+
+# The extensions assigned to this module
+_PD_EXTENSIONS_: dict = {}
+
+# base needs to be re-exported in order to properly override docstrings for BasePandasDataset
+# moving this import higher prevents sphinx from building documentation (??)
+from snowflake.snowpark.modin.pandas import base  # isort: skip  # noqa: E402,F401
+
+import snowflake.snowpark.modin.plugin.extensions.pd_extensions as pd_extensions  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.pd_overrides  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.series_extensions  # isort: skip  # noqa: E402,F401
+import snowflake.snowpark.modin.plugin.extensions.series_overrides  # isort: skip  # noqa: E402,F401
+
+
+def __getattr__(name: str) -> Any:
+    """
+    Overrides getattr on the module to enable extensions.
+    Parameters
+    ----------
+    name : str
+        The name of the attribute being retrieved.
+    Returns
+    -------
+    Attribute
+        Returns the extension attribute, if it exists, otherwise returns the attribute
+        imported in this file.
+    """
+    try:
+        return _PD_EXTENSIONS_.get(name, globals()[name])
+    except KeyError:
+        raise AttributeError(
+            f"module 'snowflake.snowpark.modin.pandas' has no attribute '{name}'"
+        )
+
+
+__all__ = [  # noqa: F405
+    "DataFrame",
+    "Series",
+    "read_csv",
+    "read_parquet",
+    "read_json",
+    "read_html",
+    "read_clipboard",
+    "read_excel",
+    "read_hdf",
+    "read_feather",
+    "read_stata",
+    "read_sas",
+    "read_pickle",
+    "read_sql",
+    "read_gbq",
+    "read_table",
+    "read_spss",
+    "read_orc",
+    "json_normalize",
+    "concat",
+    "eval",
+    "cut",
+    "factorize",
+    "test",
+    "qcut",
+    "to_datetime",
+    "get_dummies",
+    "isna",
+    "isnull",
+    "merge",
+    "pivot_table",
+    "date_range",
+    "Index",
+    "MultiIndex",
+    "Series",
+    "bdate_range",
+    "period_range",
+    "DatetimeIndex",
+    "to_timedelta",
+    "set_eng_float_format",
+    "options",
+    "set_option",
+    "CategoricalIndex",
+    "Timedelta",
+    "Timestamp",
+    "NaT",
+    "PeriodIndex",
+    "Categorical",
+    "__version__",
+    "melt",
+    "crosstab",
+    "plotting",
+    "Interval",
+    "UInt8Dtype",
+    "UInt16Dtype",
+    "UInt32Dtype",
+    "UInt64Dtype",
+    "SparseDtype",
+    "Int8Dtype",
+    "Int16Dtype",
+    "Int32Dtype",
+    "Int64Dtype",
+    "CategoricalDtype",
+    "DatetimeTZDtype",
+    "IntervalDtype",
+    "PeriodDtype",
+    "BooleanDtype",
+    "StringDtype",
+    "NA",
+    "RangeIndex",
+    "TimedeltaIndex",
+    "IntervalIndex",
+    "IndexSlice",
+    "Grouper",
+    "array",
+    "Period",
+    "show_versions",
+    "DateOffset",
+    "timedelta_range",
+    "infer_freq",
+    "interval_range",
+    "ExcelWriter",
+    "read_fwf",
+    "read_sql_table",
+    "read_sql_query",
+    "ExcelFile",
+    "to_pickle",
+    "HDFStore",
+    "lreshape",
+    "wide_to_long",
+    "merge_asof",
+    "merge_ordered",
+    "notnull",
+    "notna",
+    "pivot",
+    "to_numeric",
+    "unique",
+    "value_counts",
+    "NamedAgg",
+    "api",
+    "read_xml",
+    "ArrowDtype",
+    "Flags",
+    "Float32Dtype",
+    "Float64Dtype",
+    "from_dummies",
+]
+
+del pandas
+
+# Make SnowpandasSessionHolder this module's and modin.pandas's __class__ so that we can make
+# "session" a lazy property of the modules.
+# This implementation follows Python's suggestion here:
+# https://docs.python.org/3.12/reference/datamodel.html#customizing-module-attribute-access
+sys.modules[__name__].__class__ = SnowpandasSessionHolder
+# When docs are generated, modin.pandas is not imported, so do not perform this overwrite
+if "modin.pandas" in sys.modules:
+    sys.modules["modin.pandas"].__class__ = SnowpandasSessionHolder
+
+_SKIP_TOP_LEVEL_ATTRS = [
+    # __version__ and show_versions are exported by __all__, but not currently defined in Snowpark pandas.
+    "__version__",
+    "show_versions",
+    # SNOW-1316523: Snowpark pandas should re-export the native pandas.api submodule, but doing so
+    # would override register_pd_accessor and similar methods defined in our own modin.pandas.extensions
+    # module.
+    "api",
+]
+
+# Manually re-export the members of the pd_extensions namespace, which are not declared in __all__.
+_EXTENSION_ATTRS = ["read_snowflake", "to_snowflake", "to_snowpark", "to_pandas"]
+# We also need to re-export native_pd.offsets, since modin.pandas doesn't re-export it.
+# snowflake.snowpark.pandas.base also needs to be re-exported to make docstring overrides for BasePandasDataset work.
+_ADDITIONAL_ATTRS = ["offsets", "base"]
+
+# This code should eventually be moved into the `snowflake.snowpark.modin.plugin` module instead.
+# Currently trying to do so would result in incorrect results because `snowflake.snowpark.modin.pandas`
+# import submodules of `snowflake.snowpark.modin.plugin`, so we would encounter errors due to
+# partially initialized modules.
+import modin.pandas.api.extensions as _ext  # noqa: E402
+
+# This loop overrides all methods in the `modin.pandas` namespace so users can obtain Snowpark pandas objects from it.
+for name in __all__ + _ADDITIONAL_ATTRS:
+    if name not in _SKIP_TOP_LEVEL_ATTRS:
+        # instead of using this as a decorator, we can call the function directly
+        _ext.register_pd_accessor(name)(__getattr__(name))
+
+for name in _EXTENSION_ATTRS:
+    _ext.register_pd_accessor(name)(getattr(pd_extensions, name))
diff --git a/src/snowflake/snowpark/modin/pandas/accessor.py b/src/snowflake/snowpark/modin/pandas/accessor.py
new file mode 100644
index 00000000000..ea9946e4209
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/accessor.py
@@ -0,0 +1,201 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Implement various accessor classes for DataFrame and Series API.
+
+SparseFrameAccessor implements API of pandas.DataFrame.sparse accessor.
+
+SparseAccessor implements API of pandas.Series.sparse accessor.
+
+CachedAccessor implements API of pandas.core.accessor.CachedAccessor
+"""
+
+import pandas
+from pandas.core.dtypes.dtypes import SparseDtype
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+
+class BaseSparseAccessor:
+    """
+    Base class for various sparse DataFrame accessor classes.
+
+    Parameters
+    ----------
+    data : DataFrame or Series
+        Object to operate on.
+    """
+
+    _validation_msg = "Can only use the '.sparse' accessor with Sparse data."
+
+    def __init__(self, data=None) -> None:
+        self._parent = data
+        self._validate(data)
+
+    @classmethod
+    def _validate(cls, data):
+        """
+        Verify that `data` dtypes are compatible with `pandas.core.arrays.sparse.dtype.SparseDtype`.
+
+        Parameters
+        ----------
+        data : DataFrame
+            Object to check.
+
+        Raises
+        ------
+        NotImplementedError
+            Function is implemented in child classes.
+        """
+        ErrorMessage.not_implemented("Implemented by subclasses")  # pragma: no cover
+
+    def _default_to_pandas(self, op, *args, **kwargs):
+        """
+        Convert dataset to pandas type and call a pandas sparse.`op` on it.
+
+        Parameters
+        ----------
+        op : str
+            Name of pandas function.
+        *args : list
+            Additional positional arguments to be passed in `op`.
+        **kwargs : dict
+            Additional keywords arguments to be passed in `op`.
+
+        Returns
+        -------
+        object
+            Result of operation.
+        """
+        return self._parent._default_to_pandas(
+            lambda parent: op(parent.sparse, *args, **kwargs)
+        )
+
+
+# Snowpark pandas does not support sparse accessors - remove docstrings to prevent doctests from running
+# @_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseFrameAccessor)
+class SparseFrameAccessor(BaseSparseAccessor):
+    @classmethod
+    def _validate(cls, data):
+        """
+        Verify that `data` dtypes are compatible with `pandas.core.arrays.sparse.dtype.SparseDtype`.
+
+        Parameters
+        ----------
+        data : DataFrame
+            Object to check.
+
+        Raises
+        ------
+        AttributeError
+            If check fails.
+        """
+        dtypes = data.dtypes
+        if not all(isinstance(t, SparseDtype) for t in dtypes):
+            raise AttributeError(cls._validation_msg)
+
+    @property
+    def density(self):
+        return self._parent._default_to_pandas(pandas.DataFrame.sparse).density
+
+    @classmethod
+    def from_spmatrix(cls, data, index=None, columns=None):
+        return pd.DataFrame(
+            pandas.DataFrame.sparse.from_spmatrix(data, index=index, columns=columns)
+        )
+
+    def to_dense(self):
+        return self._default_to_pandas(pandas.DataFrame.sparse.to_dense)
+
+    def to_coo(self):
+        return self._default_to_pandas(pandas.DataFrame.sparse.to_coo)
+
+
+# Snowpark pandas does not support sparse accessors - remove docstrings to prevent doctests from running
+# @_inherit_docstrings(pandas.core.arrays.sparse.accessor.SparseAccessor)
+class SparseAccessor(BaseSparseAccessor):
+    @classmethod
+    def _validate(cls, data):
+        """
+        Verify that `data` dtype is compatible with `pandas.core.arrays.sparse.dtype.SparseDtype`.
+
+        Parameters
+        ----------
+        data : Series
+            Object to check.
+
+        Raises
+        ------
+        AttributeError
+            If check fails.
+        """
+        if not isinstance(data.dtype, SparseDtype):
+            raise AttributeError(cls._validation_msg)
+
+    @property
+    def density(self):
+        return self._parent._default_to_pandas(pandas.Series.sparse).density
+
+    @property
+    def fill_value(self):
+        return self._parent._default_to_pandas(pandas.Series.sparse).fill_value
+
+    @property
+    def npoints(self):
+        return self._parent._default_to_pandas(pandas.Series.sparse).npoints
+
+    @property
+    def sp_values(self):
+        return self._parent._default_to_pandas(pandas.Series.sparse).sp_values
+
+    @classmethod
+    def from_coo(cls, A, dense_index=False):
+        return cls._default_to_pandas(
+            pandas.Series.sparse.from_coo, A, dense_index=dense_index
+        )
+
+    def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
+        return self._default_to_pandas(
+            pandas.Series.sparse.to_coo,
+            row_levels=row_levels,
+            column_levels=column_levels,
+            sort_labels=sort_labels,
+        )
+
+    def to_dense(self):
+        return self._default_to_pandas(pandas.Series.sparse.to_dense)
+
+
+@_inherit_docstrings(pandas.core.accessor.CachedAccessor)
+class CachedAccessor:
+    def __init__(self, name: str, accessor) -> None:
+        self._name = name
+        self._accessor = accessor
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            return self._accessor
+        accessor_obj = self._accessor(obj)
+        object.__setattr__(obj, self._name, accessor_obj)
+        return accessor_obj
diff --git a/src/snowflake/snowpark/modin/pandas/api/__init__.py b/src/snowflake/snowpark/modin/pandas/api/__init__.py
new file mode 100644
index 00000000000..76858c5d9b1
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/api/__init__.py
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+from snowflake.snowpark.modin.pandas.api import extensions
+
+__all__ = ["extensions"]
diff --git a/src/snowflake/snowpark/modin/pandas/api/extensions/__init__.py b/src/snowflake/snowpark/modin/pandas/api/extensions/__init__.py
new file mode 100644
index 00000000000..d91de294dae
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/api/extensions/__init__.py
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+from .extensions import (
+    register_dataframe_accessor,
+    register_pd_accessor,
+    register_series_accessor,
+)
+
+__all__ = [
+    "register_dataframe_accessor",
+    "register_series_accessor",
+    "register_pd_accessor",
+]
diff --git a/src/snowflake/snowpark/modin/pandas/api/extensions/extensions.py b/src/snowflake/snowpark/modin/pandas/api/extensions/extensions.py
new file mode 100644
index 00000000000..0520e6a44f1
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/api/extensions/extensions.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+File containing decorators that allow registering extension APIs on Modin's API layer objects.
+In Modin OSS, this file is placed under modin/pandas/api/extensions/extensions.py. However,
+since our repository does not use Modin as an external dependency, following this path construction
+would cause circular imports.
+
+plugin/{pd,dataframe,series}_extensions.py must import this module (modin/pandas/extensions.py)
+in order to use the decorators defined here.
+
+Note that telemetry and other decorators must always be placed below the extension decorator:
+the extension decorator calls setattr on the relevant object/module, and therefore must be called
+last for other decorators to be applied. Furthermore, all DataFrame/Series methods declared in
+extensions must have a telemetry decorator, unlike those defined directly on the class, which
+have telemetry automatically added by the TelemetryMeta metaclass.
+"""
+
+from types import ModuleType
+from typing import TYPE_CHECKING, Any, Union
+
+if TYPE_CHECKING:
+    import snowflake.snowpark.modin.pandas as pd
+
+
+def _set_attribute_on_obj(
+    name: str,
+    extensions_dict: dict,
+    obj: Union["pd.DataFrame", "pd.Series", ModuleType],
+):
+    """
+    Create a new or override existing attribute on obj.
+
+    Parameters
+    ----------
+    name : str
+        The name of the attribute to assign to `obj`.
+    extensions_dict : dict
+        The dictionary mapping extension name to `new_attr` (assigned below).
+    obj : DataFrame, Series, or modin.pandas
+        The object we are assigning the new attribute to.
+
+    Returns
+    -------
+    decorator
+        Returns the decorator function.
+    """
+
+    def decorator(new_attr: Any):
+        """
+        The decorator for a function or class to be assigned to name
+
+        Parameters
+        ----------
+        new_attr : Any
+            The new attribute to assign to name.
+
+        Returns
+        -------
+        new_attr
+            Unmodified new_attr is return from the decorator.
+        """
+        extensions_dict[name] = new_attr
+        setattr(obj, name, new_attr)
+        return new_attr
+
+    return decorator
+
+
+def register_dataframe_accessor(name: str):
+    """
+    Registers a dataframe attribute with the name provided.
+    This is a decorator that assigns a new attribute to DataFrame. It can be used
+    with the following syntax:
+    ```
+    @register_dataframe_accessor("new_method")
+    def my_new_dataframe_method(*args, **kwargs):
+        # logic goes here
+        return
+    ```
+    The new attribute can then be accessed with the name provided:
+    ```
+    df.new_method(*my_args, **my_kwargs)
+    ```
+    Parameters
+    ----------
+    name : str
+        The name of the attribute to assign to DataFrame.
+    Returns
+    -------
+    decorator
+        Returns the decorator function.
+    """
+    import snowflake.snowpark.modin.pandas as pd
+
+    return _set_attribute_on_obj(
+        name,
+        pd.dataframe._DATAFRAME_EXTENSIONS_,
+        pd.dataframe.DataFrame,
+    )
+
+
+def register_series_accessor(name: str):
+    """
+    Registers a series attribute with the name provided.
+    This is a decorator that assigns a new attribute to Series. It can be used
+    with the following syntax:
+    ```
+    @register_series_accessor("new_method")
+    def my_new_series_method(*args, **kwargs):
+        # logic goes here
+        return
+    ```
+    The new attribute can then be accessed with the name provided:
+    ```
+    s.new_method(*my_args, **my_kwargs)
+    ```
+    Parameters
+    ----------
+    name : str
+        The name of the attribute to assign to Series.
+    Returns
+    -------
+    decorator
+        Returns the decorator function.
+    """
+    import snowflake.snowpark.modin.pandas as pd
+
+    return _set_attribute_on_obj(name, pd.series._SERIES_EXTENSIONS_, pd.series.Series)
+
+
+def register_pd_accessor(name: str):
+    """
+    Registers a pd namespace attribute with the name provided.
+
+    This is a decorator that assigns a new attribute to modin.pandas. It can be used
+    with the following syntax:
+
+    ```
+    @register_pd_accessor("new_function")
+    def my_new_pd_function(*args, **kwargs):
+        # logic goes here
+        return
+    ```
+
+    The new attribute can then be accessed with the name provided:
+
+    ```
+    import modin.pandas as pd
+
+    pd.new_method(*my_args, **my_kwargs)
+    ```
+
+
+    Parameters
+    ----------
+    name : str
+        The name of the attribute to assign to modin.pandas.
+
+    Returns
+    -------
+    decorator
+        Returns the decorator function.
+    """
+    import snowflake.snowpark.modin.pandas as pd
+
+    return _set_attribute_on_obj(name, pd._PD_EXTENSIONS_, pd)
diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py
new file mode 100644
index 00000000000..c06ebeae1b7
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/base.py
@@ -0,0 +1,4184 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement DataFrame/Series public API as pandas does."""
+from __future__ import annotations
+
+import pickle as pkl
+import re
+import warnings
+from collections.abc import Hashable, Mapping, Sequence
+from typing import Any, Callable, Literal, get_args
+
+import numpy as np
+import numpy.typing as npt
+import pandas
+import pandas.core.generic
+import pandas.core.resample
+import pandas.core.window.rolling
+from pandas._libs import lib
+from pandas._libs.lib import NoDefault, is_bool, no_default
+from pandas._typing import (
+    AggFuncType,
+    AnyArrayLike,
+    Axes,
+    Axis,
+    CompressionOptions,
+    DtypeBackend,
+    FillnaOptions,
+    IgnoreRaise,
+    IndexKeyFunc,
+    IndexLabel,
+    Level,
+    NaPosition,
+    RandomState,
+    Scalar,
+    StorageOptions,
+    TimedeltaConvertibleTypes,
+    TimestampConvertibleTypes,
+)
+from pandas.compat import numpy as numpy_compat
+from pandas.core.common import apply_if_callable, count_not_none, pipe
+from pandas.core.dtypes.common import (
+    is_dict_like,
+    is_dtype_equal,
+    is_list_like,
+    is_numeric_dtype,
+    is_object_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.inference import is_integer
+from pandas.core.indexes.api import ensure_index
+from pandas.util._validators import (
+    validate_ascending,
+    validate_bool_kwarg,
+    validate_percentile,
+)
+
+from snowflake.snowpark.modin import pandas as pd
+from snowflake.snowpark.modin.pandas.utils import (
+    _doc_binary_op,
+    get_as_shape_compatible_dataframe_or_series,
+    is_scalar,
+    raise_if_native_pandas_objects,
+    validate_and_try_convert_agg_func_arg_func_to_str,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
+from snowflake.snowpark.modin.plugin._typing import ListLike
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import (
+    _inherit_docstrings,
+    try_cast_to_pandas,
+    validate_int_kwarg,
+)
+
+# Similar to pandas, sentinel value to use as kwarg in place of None when None has
+# special meaning and needs to be distinguished from a user explicitly passing None.
+sentinel = object()
+
+# Do not look up certain attributes in columns or index, as they're used for some
+# special purposes, like serving remote context
+_ATTRS_NO_LOOKUP = {
+    "____id_pack__",
+    "__name__",
+    "_cache",
+    "_ipython_canary_method_should_not_exist_",
+    "_ipython_display_",
+    "_repr_html_",
+    "_repr_javascript_",
+    "_repr_jpeg_",
+    "_repr_json_",
+    "_repr_latex_",
+    "_repr_markdown_",
+    "_repr_mimebundle_",
+    "_repr_pdf_",
+    "_repr_png_",
+    "_repr_svg_",
+    "__array_struct__",
+    "__array_interface__",
+    "_typ",
+}
+
+_DEFAULT_BEHAVIOUR = {
+    "__init__",
+    "__class__",
+    "_get_index",
+    "_set_index",
+    "_pandas_class",
+    "_get_axis_number",
+    "empty",
+    "index",
+    "columns",
+    "name",
+    "dtypes",
+    "dtype",
+    "groupby",
+    "_get_name",
+    "_set_name",
+    "_default_to_pandas",
+    "_query_compiler",
+    "_to_pandas",
+    "_repartition",
+    "_build_repr_df",
+    "_reduce_dimension",
+    "__repr__",
+    "__len__",
+    "__constructor__",
+    "_create_or_update_from_compiler",
+    "_update_inplace",
+    # for persistance support;
+    # see DataFrame methods docstrings for more
+    "_inflate_light",
+    "_inflate_full",
+    "__reduce__",
+    "__reduce_ex__",
+    "_init",
+} | _ATTRS_NO_LOOKUP
+
+
+_doc_binary_op_kwargs = {"returns": "BasePandasDataset", "left": "BasePandasDataset"}
+
+
+@_inherit_docstrings(
+    pandas.DataFrame,
+    apilink=["pandas.DataFrame", "pandas.Series"],
+    excluded=[
+        pandas.DataFrame.between_time,
+        pandas.Series.between_time,
+        pandas.DataFrame.flags,
+        pandas.Series.flags,
+        pandas.DataFrame.kurt,
+        pandas.Series.kurt,
+        pandas.DataFrame.kurtosis,
+        pandas.Series.kurtosis,
+        pandas.DataFrame.rank,
+        pandas.Series.rank,
+        pandas.DataFrame.to_csv,
+        pandas.Series.to_csv,
+        pandas.DataFrame.sum,
+    ],
+)
+class BasePandasDataset(metaclass=TelemetryMeta):
+    """
+    Implement most of the common code that exists in DataFrame/Series.
+
+    Since both objects share the same underlying representation, and the algorithms
+    are the same, we use this object to define the general behavior of those objects
+    and then use those objects to define the output type.
+
+    TelemetryMeta is a metaclass that automatically add telemetry decorators to classes/instance methods.
+    See TelemetryMeta for details. Note: Its subclasses will inherit this metaclass.
+    """
+
+    # pandas class that we pretend to be; usually it has the same name as our class
+    # but lives in "pandas" namespace.
+    _pandas_class = pandas.core.generic.NDFrame
+
+    @pandas.util.cache_readonly
+    def _is_dataframe(self) -> bool:
+        """
+        Tell whether this is a dataframe.
+
+        Ideally, other methods of BasePandasDataset shouldn't care whether this
+        is a dataframe or a series, but sometimes we need to know. This method
+        is better than hasattr(self, "columns"), which for series will call
+        self.__getattr__("columns"), which requires materializing the index.
+
+        Returns
+        -------
+        bool : Whether this is a dataframe.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return issubclass(self._pandas_class, pandas.DataFrame)
+
+    def _add_sibling(self, sibling):
+        """
+        Add a DataFrame or Series object to the list of siblings.
+
+        Siblings are objects that share the same query compiler. This function is called
+        when a shallow copy is made.
+
+        Parameters
+        ----------
+        sibling : BasePandasDataset
+            Dataset to add to siblings list.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        sibling._siblings = self._siblings + [self]
+        self._siblings += [sibling]
+        for sib in self._siblings:
+            sib._siblings += [sibling]
+
+    def _update_inplace(self, new_query_compiler):
+        """
+        Update the current DataFrame inplace.
+
+        Parameters
+        ----------
+        new_query_compiler : query_compiler
+            The new QueryCompiler to use to manage the data.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        old_query_compiler = self._query_compiler
+        self._query_compiler = new_query_compiler
+        for sib in self._siblings:
+            sib._query_compiler = new_query_compiler
+        old_query_compiler.free()
+
+    def _validate_other(
+        self,
+        other,
+        axis,
+        dtype_check=False,
+        compare_index=False,
+    ):
+        """
+        Help to check validity of other in inter-df operations.
+
+        Parameters
+        ----------
+        other : modin.pandas.BasePandasDataset
+            Another dataset to validate against `self`.
+        axis : {None, 0, 1}
+            Specifies axis along which to do validation. When `1` or `None`
+            is specified, validation is done along `index`, if `0` is specified
+            validation is done along `columns` of `other` frame.
+        dtype_check : bool, default: False
+            Validates that both frames have compatible dtypes.
+        compare_index : bool, default: False
+            Compare Index if True.
+
+        Returns
+        -------
+        modin.pandas.BasePandasDataset
+            Other frame if it is determined to be valid.
+
+        Raises
+        ------
+        ValueError
+            If `other` is `Series` and its length is different from
+            length of `self` `axis`.
+        TypeError
+            If any validation checks fail.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if isinstance(other, BasePandasDataset):
+            return other._query_compiler
+        if not is_list_like(other):
+            # We skip dtype checking if the other is a scalar. Note that pandas
+            # is_scalar can be misleading as it is False for almost all objects,
+            # even when those objects should be treated as scalars. See e.g.
+            # https://github.com/modin-project/modin/issues/5236. Therefore, we
+            # detect scalars by checking that `other` is neither a list-like nor
+            # another BasePandasDataset.
+            return other
+        axis = self._get_axis_number(axis) if axis is not None else 1
+        result = other
+        if axis == 0:
+            if len(other) != len(self._query_compiler.index):
+                raise ValueError(
+                    f"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: "
+                    + f"given {len(other)}"
+                )
+        else:
+            if len(other) != len(self._query_compiler.columns):
+                raise ValueError(
+                    f"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: "
+                    + f"given {len(other)}"
+                )
+        if hasattr(other, "dtype"):
+            other_dtypes = [other.dtype] * len(other)
+        elif is_dict_like(other):
+            other_dtypes = [
+                type(other[label])
+                for label in self._query_compiler.get_axis(axis)
+                # The binary operation is applied for intersection of axis labels
+                # and dictionary keys. So filtering out extra keys.
+                if label in other
+            ]
+        else:
+            other_dtypes = [type(x) for x in other]
+        if compare_index:
+            if not self.index.equals(other.index):
+                raise TypeError("Cannot perform operation with non-equal index")
+        # Do dtype checking.
+        if dtype_check:
+            self_dtypes = self._get_dtypes()
+            if is_dict_like(other):
+                # The binary operation is applied for the intersection of axis labels
+                # and dictionary keys. So filtering `self_dtypes` to match the `other`
+                # dictionary.
+                self_dtypes = [
+                    dtype
+                    for label, dtype in zip(
+                        self._query_compiler.get_axis(axis), self._get_dtypes()
+                    )
+                    if label in other
+                ]
+
+            # TODO(https://github.com/modin-project/modin/issues/5239):
+            # this spuriously rejects other that is a list including some
+            # custom type that can be added to self's elements.
+            if not all(
+                (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
+                or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype))
+                # Check if dtype is timedelta ("m") or datetime ("M")
+                or (
+                    lib.is_np_dtype(self_dtype, "mM")
+                    and lib.is_np_dtype(other_dtype, "mM")
+                )
+                or is_dtype_equal(self_dtype, other_dtype)
+                for self_dtype, other_dtype in zip(self_dtypes, other_dtypes)
+            ):
+                raise TypeError("Cannot do operation with improper dtypes")
+        return result
+
+    def _validate_function(self, func, on_invalid=None):
+        """
+        Check the validity of the function which is intended to be applied to the frame.
+
+        Parameters
+        ----------
+        func : object
+        on_invalid : callable(str, cls), optional
+            Function to call in case invalid `func` is met, `on_invalid` takes an error
+            message and an exception type as arguments. If not specified raise an
+            appropriate exception.
+            **Note:** This parameter is a hack to concord with pandas error types.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+
+        def error_raiser(msg, exception=Exception):
+            raise exception(msg)
+
+        if on_invalid is None:
+            on_invalid = error_raiser
+
+        if isinstance(func, dict):
+            [self._validate_function(fn, on_invalid) for fn in func.values()]
+            return
+            # We also could validate this, but it may be quite expensive for lazy-frames
+            # if not all(idx in self.axes[axis] for idx in func.keys()):
+            #     error_raiser("Invalid dict keys", KeyError)
+
+        if not is_list_like(func):
+            func = [func]
+
+        for fn in func:
+            if isinstance(fn, str):
+                if not (hasattr(self, fn) or hasattr(np, fn)):
+                    on_invalid(
+                        f"{fn} is not valid function for {type(self)} object.",
+                        AttributeError,
+                    )
+            elif not callable(fn):
+                on_invalid(
+                    f"One of the passed functions has an invalid type: {type(fn)}: {fn}, "
+                    + "only callable or string is acceptable.",
+                    TypeError,
+                )
+
+    def _binary_op(
+        self,
+        op: str,
+        other: BasePandasDataset,
+        axis: Axis,
+        level: Level | None = None,
+        fill_value: float | None = None,
+        **kwargs: Any,
+    ):
+        """
+        Do binary operation between two datasets.
+
+        Parameters
+        ----------
+        op : str
+            Name of binary operation.
+        other : modin.pandas.BasePandasDataset
+            Second operand of binary operation.
+        axis: Whether to compare by the index (0 or ‘index’) or columns. (1 or ‘columns’).
+        level: Broadcast across a level, matching Index values on the passed MultiIndex level.
+        fill_value: Fill existing missing (NaN) values, and any new element needed for
+            successful DataFrame alignment, with this value before computation.
+            If data in both corresponding DataFrame locations is missing the result will be missing.
+            only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
+
+        kwargs can contain the following parameters passed in at the frontend:
+            func: Only used for `combine` method. Function that takes two series as inputs and
+                return a Series or a scalar. Used to merge the two dataframes column by columns.
+
+        Returns
+        -------
+        modin.pandas.BasePandasDataset
+            Result of binary operation.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        raise_if_native_pandas_objects(other)
+        axis = self._get_axis_number(axis)
+        squeeze_self = isinstance(self, pd.Series)
+
+        # pandas itself will ignore the axis argument when using Series.<op>.
+        # Per default, it is set to axis=0. However, for the case of a Series interacting with
+        # a DataFrame the behavior is axis=1. Manually check here for this case and adjust the axis.
+        if isinstance(self, pd.Series) and isinstance(other, pd.DataFrame):
+            axis = 1
+
+        new_query_compiler = self._query_compiler.binary_op(
+            op=op,
+            other=other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+            squeeze_self=squeeze_self,
+            **kwargs,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler)
+
+    def _default_to_pandas(self, op, *args, **kwargs):
+        """
+        Convert dataset to pandas type and call a pandas function on it.
+
+        Parameters
+        ----------
+        op : str
+            Name of pandas function.
+        *args : list
+            Additional positional arguments to be passed to `op`.
+        **kwargs : dict
+            Additional keywords arguments to be passed to `op`.
+
+        Returns
+        -------
+        object
+            Result of operation.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        args = try_cast_to_pandas(args)
+        kwargs = try_cast_to_pandas(kwargs)
+        pandas_obj = self._to_pandas()
+        if callable(op):
+            result = op(pandas_obj, *args, **kwargs)
+        elif isinstance(op, str):
+            # The inner `getattr` is ensuring that we are treating this object (whether
+            # it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr`
+            # will get the operation (`op`) from the pandas version of the class and run
+            # it on the object after we have converted it to pandas.
+            attr = getattr(self._pandas_class, op)
+            if isinstance(attr, property):
+                result = getattr(pandas_obj, op)
+            else:
+                result = attr(pandas_obj, *args, **kwargs)
+        else:
+            ErrorMessage.internal_error(
+                failure_condition=True,
+                extra_log=f"{op} is an unsupported operation",
+            )
+        # SparseDataFrames cannot be serialized by arrow and cause problems for Modin.
+        # For now we will use pandas.
+        if isinstance(result, type(self)) and not isinstance(
+            result, (pandas.SparseDataFrame, pandas.SparseSeries)
+        ):
+            return self._create_or_update_from_compiler(
+                result, inplace=kwargs.get("inplace", False)
+            )
+        elif isinstance(result, pandas.DataFrame):
+            from snowflake.snowpark.modin.pandas import DataFrame
+
+            return DataFrame(result)
+        elif isinstance(result, pandas.Series):
+            from snowflake.snowpark.modin.pandas import Series
+
+            return Series(result)
+        # inplace
+        elif result is None:
+            return self._create_or_update_from_compiler(
+                getattr(pd, type(pandas_obj).__name__)(pandas_obj)._query_compiler,
+                inplace=True,
+            )
+        else:
+            try:
+                if (
+                    isinstance(result, (list, tuple))
+                    and len(result) == 2
+                    and isinstance(result[0], pandas.DataFrame)
+                ):
+                    # Some operations split the DataFrame into two (e.g. align). We need to wrap
+                    # both of the returned results
+                    if isinstance(result[1], pandas.DataFrame):
+                        second = self.__constructor__(result[1])
+                    else:
+                        second = result[1]
+                    return self.__constructor__(result[0]), second
+                else:
+                    return result
+            except TypeError:
+                return result
+
+    @classmethod
+    def _get_axis_number(cls, axis):
+        """
+        Convert axis name or number to axis index.
+
+        Parameters
+        ----------
+        axis : int, str or pandas._libs.lib.NoDefault
+            Axis name ('index' or 'columns') or number to be converted to axis index.
+
+        Returns
+        -------
+        int
+            0 or 1 - axis index in the array of axes stored in the dataframe.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if axis is no_default:
+            axis = None
+
+        return cls._pandas_class._get_axis_number(axis) if axis is not None else 0
+
+    @pandas.util.cache_readonly
+    def __constructor__(self):
+        """
+        Construct DataFrame or Series object depending on self type.
+
+        Returns
+        -------
+        modin.pandas.BasePandasDataset
+            Constructed object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return type(self)
+
+    def abs(self):  # noqa: RT01, D200
+        """
+        Return a `BasePandasDataset` with absolute numeric value of each element.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(query_compiler=self._query_compiler.unary_op("abs"))
+
+    def _to_series_list(self, index: pd.Index) -> list[pd.Series]:
+        """
+        Convert index to a list of series
+        Args:
+            index: can be single or multi index
+
+        Returns:
+            the list of series
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if isinstance(index, pd.MultiIndex):
+            return [
+                pd.Series(index.get_level_values(level))
+                for level in range(index.nlevels)
+            ]
+        elif isinstance(index, pd.Index):
+            return [pd.Series(index)]
+
+    def _set_index(self, new_index: Axes) -> None:
+        """
+        Set the index for this DataFrame.
+
+        Parameters
+        ----------
+        new_index : pandas.Index
+            The new index to set this.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        self._update_inplace(
+            new_query_compiler=self._query_compiler.set_index(
+                [
+                    s._query_compiler
+                    for s in self._to_series_list(ensure_index(new_index))
+                ]
+            )
+        )
+
+    def set_axis(
+        self,
+        labels: IndexLabel,
+        *,
+        axis: Axis = 0,
+        copy: bool | NoDefault = no_default,
+    ):
+        """
+        Assign desired index to given axis.
+        """
+        # Behavior based on copy:
+        # -----------------------------------
+        # - In native pandas, copy determines whether to create a copy of the data (not DataFrame).
+        # - We cannot emulate the native pandas' copy behavior in Snowpark since a copy of only data
+        #   cannot be created -- you can only copy the whole object (DataFrame/Series).
+        #
+        # Snowpark behavior:
+        # ------------------
+        # - copy is kept for compatibility with native pandas but is ignored. The user is warned that copy is unused.
+        # Warn user that copy does not do anything.
+        if copy is not no_default:
+            WarningMessage.single_warning(
+                message=f"{type(self).__name__}.set_axis 'copy' keyword is unused and is ignored."
+            )
+        if labels is None:
+            raise TypeError("None is not a valid value for the parameter 'labels'.")
+
+        # Determine whether to update self or a copy and perform update.
+        obj = self.copy()
+        setattr(obj, axis, labels)
+        return obj
+
+    def _get_index(self):
+        """
+        Get the index for this DataFrame.
+
+        Returns
+        -------
+        pandas.Index
+            The union of all indexes across the partitions.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._query_compiler.index
+
+    index = property(_get_index, _set_index)
+
+    def add(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "add", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def aggregate(
+        self, func: AggFuncType = None, axis: Axis | None = 0, *args: Any, **kwargs: Any
+    ):
+        """
+        Aggregate using one or more operations over the specified axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        from snowflake.snowpark.modin.pandas import Series
+
+        origin_axis = axis
+        axis = self._get_axis_number(axis)
+
+        if axis == 1 and isinstance(self, Series):
+            raise ValueError(f"No axis named {origin_axis} for object type Series")
+
+        if len(self._query_compiler.columns) == 0:
+            # native pandas raise error with message "no result", here we raise a more readable error.
+            raise ValueError("No column to aggregate on.")
+
+        func = validate_and_try_convert_agg_func_arg_func_to_str(
+            agg_func=func,
+            obj=self,
+            allow_duplication=False,
+            axis=axis,
+        )
+
+        # This is to stay consistent with pandas result format, when the func is single
+        # aggregation function in format of callable or str, reduce the result dimension to
+        # convert dataframe to series, or convert series to scalar.
+        need_reduce_dimension = (
+            (callable(func) or isinstance(func, str))
+            # A Series should be returned when a single scalar string/function aggregation function, or a
+            # dict of scalar string/functions is specified. In all other cases (including if the function
+            # is a 1-element list), the result is a DataFrame.
+            #
+            # The examples below have axis=1, but the same logic is applied for axis=0.
+            # >>> df = pd.DataFrame({"a": [0, 1], "b": [2, 3]})
+            #
+            # single aggregation: return Series
+            # >>> df.agg("max", axis=1)
+            # 0    2
+            # 1    3
+            # dtype: int64
+            #
+            # list of aggregations: return DF
+            # >>> df.agg(["max"], axis=1)
+            #    max
+            # 0    2
+            # 1    3
+            #
+            # dict where all aggregations are strings: return Series
+            # >>> df.agg({1: "max", 0: "min"}, axis=1)
+            # 1    3
+            # 0    0
+            # dtype: int64
+            #
+            # dict where one element is a list: return DF
+            # >>> df.agg({1: "max", 0: ["min"]}, axis=1)
+            #    max  min
+            # 1  3.0  NaN
+            # 0  NaN  0.0
+            or (
+                is_dict_like(func)
+                and all(not is_list_like(value) for value in func.values())
+            )
+        )
+
+        # If func is a dict, pandas will not respect kwargs for each aggregation function, and
+        # we should drop them before passing the to the query compiler.
+        #
+        # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg("max", skipna=False, axis=1)
+        # 0    NaN
+        # 1    1.0
+        # dtype: float64
+        # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg(["max"], skipna=False, axis=1)
+        #    max
+        # 0  0.0
+        # 1  1.0
+        # >>> pd.DataFrame([[np.nan], [0]]).aggregate("count", skipna=True, axis=0)
+        # 0    1
+        # dtype: int8
+        # >>> pd.DataFrame([[np.nan], [0]]).count(skipna=True, axis=0)
+        # TypeError: got an unexpected keyword argument 'skipna'
+        if is_dict_like(func):
+            kwargs.clear()
+
+        result = self.__constructor__(
+            query_compiler=self._query_compiler.agg(
+                func=func,
+                axis=axis,
+                args=args,
+                kwargs=kwargs,
+            )
+        )
+
+        if need_reduce_dimension:
+            if self._is_dataframe:
+                result = Series(query_compiler=result._query_compiler)
+
+            if isinstance(result, Series):
+                # When func is just "quantile" with a scalar q, result has quantile value as name
+                q = kwargs.get("q", 0.5)
+                if func == "quantile" and is_scalar(q):
+                    result.name = q
+                else:
+                    result.name = None
+
+            # handle case for single scalar (same as result._reduce_dimension())
+            if isinstance(self, Series):
+                return result.to_pandas().squeeze()
+
+        return result
+
+    agg = aggregate
+
+    def _string_function(self, func, *args, **kwargs):
+        """
+        Execute a function identified by its string name.
+
+        Parameters
+        ----------
+        func : str
+            Function name to call on `self`.
+        *args : list
+            Positional arguments to pass to func.
+        **kwargs : dict
+            Keyword arguments to pass to func.
+
+        Returns
+        -------
+        object
+            Function result.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        assert isinstance(func, str)
+        f = getattr(self, func, None)
+        if f is not None:
+            if callable(f):
+                return f(*args, **kwargs)
+            assert len(args) == 0
+            assert len([kwarg for kwarg in kwargs if kwarg != "axis"]) == 0
+            return f
+        f = getattr(np, func, None)
+        if f is not None:
+            return self._default_to_pandas("agg", func, *args, **kwargs)
+        raise ValueError(f"{func} is an unknown string function")
+
+    def _get_dtypes(self):
+        """
+        Get dtypes as list.
+
+        Returns
+        -------
+        list
+            Either a one-element list that contains `dtype` if object denotes a Series
+            or a list that contains `dtypes` if object denotes a DataFrame.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if hasattr(self, "dtype"):
+            return [self.dtype]
+        else:
+            return list(self.dtypes)
+
+    def align(
+        self,
+        other,
+        join="outer",
+        axis=None,
+        level=None,
+        copy=None,
+        fill_value=None,
+        method=lib.no_default,
+        limit=lib.no_default,
+        fill_axis=lib.no_default,
+        broadcast_axis=lib.no_default,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Align two objects on their axes with the specified join method.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "align",
+            other,
+            join=join,
+            axis=axis,
+            level=level,
+            copy=copy,
+            fill_value=fill_value,
+            method=method,
+            limit=limit,
+            fill_axis=fill_axis,
+            broadcast_axis=broadcast_axis,
+        )
+
+    def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
+        """
+        Return whether all elements are True, potentially over an axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        if axis is not None:
+            axis = self._get_axis_number(axis)
+            if bool_only and axis == 0:
+                if hasattr(self, "dtype"):
+                    ErrorMessage.not_implemented(
+                        "{}.{} does not implement numeric_only.".format(
+                            type(self).__name__, "all"
+                        )
+                    )  # pragma: no cover
+                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
+                return data_for_compute.all(
+                    axis=axis, bool_only=False, skipna=skipna, **kwargs
+                )
+            return self._reduce_dimension(
+                self._query_compiler.all(
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
+                )
+            )
+        else:
+            if bool_only:
+                raise ValueError(f"Axis must be 0 or 1 (got {axis})")
+            # Reduce to a scalar if axis is None.
+            result = self._reduce_dimension(
+                # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
+                # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+                self._query_compiler.all(
+                    axis=0,
+                    bool_only=bool_only,
+                    skipna=skipna,
+                    **kwargs,
+                )
+            )
+            if isinstance(result, BasePandasDataset):
+                return result.all(
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
+                )
+            return result
+
+    def any(self, axis=0, bool_only=None, skipna=True, **kwargs):
+        """
+        Return whether any element is True, potentially over an axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        if axis is not None:
+            axis = self._get_axis_number(axis)
+            if bool_only and axis == 0:
+                if hasattr(self, "dtype"):
+                    ErrorMessage.not_implemented(
+                        "{}.{} does not implement numeric_only.".format(
+                            type(self).__name__, "all"
+                        )
+                    )  # pragma: no cover
+                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
+                return data_for_compute.any(
+                    axis=axis, bool_only=False, skipna=skipna, **kwargs
+                )
+            return self._reduce_dimension(
+                self._query_compiler.any(
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
+                )
+            )
+        else:
+            if bool_only:
+                raise ValueError(f"Axis must be 0 or 1 (got {axis})")
+            # Reduce to a scalar if axis is None.
+            result = self._reduce_dimension(
+                self._query_compiler.any(
+                    axis=0,
+                    bool_only=bool_only,
+                    skipna=skipna,
+                    **kwargs,
+                )
+            )
+            if isinstance(result, BasePandasDataset):
+                return result.any(
+                    axis=axis, bool_only=bool_only, skipna=skipna, **kwargs
+                )
+            return result
+
+    def apply(
+        self,
+        func,
+        axis,
+        broadcast,
+        raw,
+        reduce,
+        result_type,
+        convert_dtype,
+        args,
+        **kwds,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Apply a function along an axis of the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+
+        def error_raiser(msg, exception):
+            """Convert passed exception to the same type as pandas do and raise it."""
+            # HACK: to concord with pandas error types by replacing all of the
+            # TypeErrors to the AssertionErrors
+            exception = exception if exception is not TypeError else AssertionError
+            raise exception(msg)
+
+        self._validate_function(func, on_invalid=error_raiser)
+        axis = self._get_axis_number(axis)
+        # TODO SNOW-864025: Support str in series.apply and df.apply
+        if isinstance(func, str):
+            # if axis != 1 function can be bounded to the Series, which doesn't
+            # support axis parameter
+            if axis == 1:
+                kwds["axis"] = axis
+            result = self._string_function(func, *args, **kwds)
+            if isinstance(result, BasePandasDataset):
+                return result._query_compiler
+            return result
+        # TODO SNOW-856682: Support dict in series.apply and df.apply
+        elif isinstance(func, dict):
+            if len(self.columns) != len(set(self.columns)):
+                WarningMessage.mismatch_with_pandas(
+                    operation="apply",
+                    message="Duplicate column names not supported with apply().",
+                )  # pragma: no cover
+        query_compiler = self._query_compiler.apply(
+            func,
+            axis,
+            args=args,
+            raw=raw,
+            result_type=result_type,
+            **kwds,
+        )
+        return query_compiler
+
+    def asfreq(
+        self, freq, method=None, how=None, normalize=False, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Convert time series to specified frequency.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "asfreq",
+            freq,
+            method=method,
+            how=how,
+            normalize=normalize,
+            fill_value=fill_value,
+        )
+
+    def asof(self, where, subset=None):  # noqa: PR01, RT01, D200
+        """
+        Return the last row(s) without any NaNs before `where`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        scalar = not is_list_like(where)
+        if isinstance(where, pandas.Index):
+            # Prevent accidental mutation of original:
+            where = where.copy()
+        else:
+            if scalar:
+                where = [where]
+            where = pandas.Index(where)
+
+        if subset is None:
+            data = self
+        else:
+            # Only relevant for DataFrames:
+            data = self[subset]
+        no_na_index = data.dropna().index
+        new_index = pandas.Index([no_na_index.asof(i) for i in where])
+        result = self.reindex(new_index)
+        result.index = where
+
+        if scalar:
+            # Need to return a Series:
+            result = result.squeeze()
+        return result
+
+    def astype(
+        self,
+        dtype: str | type | pd.Series | dict[str, type],
+        copy: bool = True,
+        errors: Literal["raise", "ignore"] = "raise",
+    ) -> pd.DataFrame | pd.Series:
+        """
+        Cast a Modin object to a specified dtype `dtype`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # dtype can be a series, a dict, or a scalar. If it's series or scalar,
+        # convert it to a dict before passing it to the query compiler.
+        raise_if_native_pandas_objects(dtype)
+        from snowflake.snowpark.modin.pandas import Series
+
+        if isinstance(dtype, Series):
+            dtype = dtype.to_pandas()
+            if not dtype.index.is_unique:
+                raise ValueError(
+                    "The new Series of types must have a unique index, i.e. "
+                    + "it must be one-to-one mapping from column names to "
+                    + " their new dtypes."
+                )
+            dtype = dtype.to_dict()
+        # If we got a series or dict originally, dtype is a dict now. Its keys
+        # must be column names.
+        if isinstance(dtype, dict):
+            # Avoid materializing columns. The query compiler will handle errors where
+            # dtype dict includes keys that are not in columns.
+            col_dtypes = dtype
+            for col_name in col_dtypes:
+                if col_name not in self._query_compiler.columns:
+                    raise KeyError(
+                        "Only a column name can be used for the key in a dtype mappings argument. "
+                        f"'{col_name}' not found in columns."
+                    )
+        else:
+            # Assume that the dtype is a scalar.
+            col_dtypes = {column: dtype for column in self._query_compiler.columns}
+
+        # ensure values are pandas dtypes
+        col_dtypes = {k: pandas_dtype(v) for k, v in col_dtypes.items()}
+        new_query_compiler = self._query_compiler.astype(col_dtypes, errors=errors)
+        return self._create_or_update_from_compiler(new_query_compiler, not copy)
+
+    @property
+    def at(self, axis=None):  # noqa: PR01, RT01, D200
+        """
+        Get a single value for a row/column label pair.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        from .indexing import _LocIndexer
+
+        return _LocIndexer(self)
+
+    def at_time(self, time, asof=False, axis=None):  # noqa: PR01, RT01, D200
+        """
+        Select values at particular time of day (e.g., 9:30AM).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        idx = self.index if axis == 0 else self.columns
+        indexer = pandas.Series(index=idx).at_time(time, asof=asof).index
+        return self.loc[indexer] if axis == 0 else self.loc[:, indexer]
+
+    @_inherit_docstrings(
+        pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time"
+    )
+    def between_time(
+        self: BasePandasDataset,
+        start_time,
+        end_time,
+        inclusive: str | None = None,
+        axis=None,
+    ):  # noqa: PR01, RT01, D200
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        idx = self.index if axis == 0 else self.columns
+        indexer = (
+            pandas.Series(index=idx)
+            .between_time(
+                start_time,
+                end_time,
+                inclusive=inclusive,
+            )
+            .index
+        )
+        return self.loc[indexer] if axis == 0 else self.loc[:, indexer]
+
+    def bfill(
+        self, axis=None, inplace=False, limit=None, downcast=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Synonym for `DataFrame.fillna` with ``method='bfill'``.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self.fillna(
+            method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace
+        )
+
+    backfill = bfill
+
+    def bool(self):  # noqa: RT01, D200
+        """
+        Return the bool of a single element `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        shape = self.shape
+        if shape != (1,) and shape != (1, 1):
+            raise ValueError(
+                """The PandasObject does not have exactly
+                                1 element. Return the bool of a single
+                                element PandasObject. The truth value is
+                                ambiguous. Use a.empty, a.item(), a.any()
+                                or a.all()."""
+            )
+        else:
+            return self._to_pandas().bool()
+
+    def clip(
+        self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs
+    ):  # noqa: PR01, RT01, D200
+        """
+        Trim values at input threshold(s).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # validate inputs
+        ErrorMessage.not_implemented()
+        if axis is not None:
+            axis = self._get_axis_number(axis)
+        self._validate_dtypes(numeric_only=True)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        axis = numpy_compat.function.validate_clip_with_axis(axis, args, kwargs)
+        # any np.nan bounds are treated as None
+        if lower is not None and np.any(np.isnan(lower)):
+            lower = None
+        if upper is not None and np.any(np.isnan(upper)):
+            upper = None
+        if is_list_like(lower) or is_list_like(upper):
+            if axis is None:
+                raise ValueError("Must specify axis = 0 or 1")
+            lower = self._validate_other(lower, axis)
+            upper = self._validate_other(upper, axis)
+        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
+        # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+        new_query_compiler = self._query_compiler.clip(
+            lower=lower, upper=upper, axis=axis, inplace=inplace, *args, **kwargs
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def combine(self, other, func, fill_value=None, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Perform combination of `BasePandasDataset`-s according to `func`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._binary_op(
+            "combine", other, axis=0, func=func, fill_value=fill_value, **kwargs
+        )
+
+    def combine_first(self, other):  # noqa: PR01, RT01, D200
+        """
+        Update null elements with value in the same location in `other`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._binary_op("combine_first", other, axis=0)
+
+    def copy(self, deep: bool = True):
+        """
+        Make a copy of the object's metadata.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if deep:
+            return self.__constructor__(query_compiler=self._query_compiler.copy())
+        new_obj = self.__constructor__(query_compiler=self._query_compiler)
+        self._add_sibling(new_obj)
+        return new_obj
+
+    def count(
+        self,
+        axis: Axis | None = 0,
+        numeric_only: bool = False,
+    ):
+        """
+        Count non-NA cells for `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._agg_helper(
+            func="count",
+            axis=axis,
+            numeric_only=numeric_only,
+        )
+
+    def cummax(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return cumulative maximum over a `BasePandasDataset` axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        if axis == 1:
+            self._validate_dtypes(numeric_only=True)
+        return self.__constructor__(
+            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
+            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+            query_compiler=self._query_compiler.cummax(
+                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
+            )
+        )
+
+    def cummin(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return cumulative minimum over a `BasePandasDataset` axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        if axis == 1:
+            self._validate_dtypes(numeric_only=True)
+        return self.__constructor__(
+            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
+            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+            query_compiler=self._query_compiler.cummin(
+                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
+            )
+        )
+
+    def cumprod(
+        self, axis=None, skipna=True, *args, **kwargs
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return cumulative product over a `BasePandasDataset` axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        self._validate_dtypes(numeric_only=True)
+        return self.__constructor__(
+            # FIXME: Judging by pandas docs `**kwargs` serves only compatibility
+            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+            query_compiler=self._query_compiler.cumprod(
+                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
+            )
+        )
+
+    def cumsum(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return cumulative sum over a `BasePandasDataset` axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        self._validate_dtypes(numeric_only=True)
+        return self.__constructor__(
+            # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
+            # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+            query_compiler=self._query_compiler.cumsum(
+                fold_axis=axis, axis=axis, skipna=skipna, **kwargs
+            )
+        )
+
+    def describe(
+        self,
+        percentiles: ListLike | None = None,
+        include: ListLike | Literal["all"] | None = None,
+        exclude: ListLike | None = None,
+    ) -> BasePandasDataset:
+        """
+        Generate descriptive statistics.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # Upstream modin uses pandas.core.methods.describe._refine_percentiles for this,
+        # which is not available in pandas 1.5.X
+        if percentiles is not None:
+            # explicit conversion of `percentiles` to list
+            percentiles = list(percentiles)
+
+            # get them all to be in [0, 1]
+            validate_percentile(percentiles)
+
+            # median should always be included
+            if 0.5 not in percentiles:
+                percentiles.append(0.5)
+            percentiles = np.asarray(percentiles)
+        else:
+            percentiles = np.array([0.25, 0.5, 0.75])
+
+        data = self
+        if self._is_dataframe:
+            # Upstream modin lacks this check because it defaults to pandas for describing empty dataframes
+            if len(self.columns) == 0:
+                raise ValueError("Cannot describe a DataFrame without columns")
+
+            # include/exclude are ignored for Series
+            if (include is None) and (exclude is None):
+                # when some numerics are found, keep only numerics
+                default_include: list[npt.DTypeLike] = [np.number]
+                default_include.append("datetime")
+                data = self.select_dtypes(include=default_include)
+                if len(data.columns) == 0:
+                    data = self
+            elif include == "all":
+                if exclude is not None:
+                    raise ValueError("exclude must be None when include is 'all'")
+                data = self
+            else:
+                data = self.select_dtypes(
+                    include=include,
+                    exclude=exclude,
+                )
+        # Upstream modin uses data.empty, but that incurs an extra row count query
+        if self._is_dataframe and len(data.columns) == 0:
+            # Match pandas error from concatenating empty list of series descriptions.
+            raise ValueError("No objects to concatenate")
+
+        return self.__constructor__(
+            query_compiler=data._query_compiler.describe(percentiles=percentiles)
+        )
+
+    def diff(self, periods: int = 1, axis: Axis = 0):
+        """
+        First discrete difference of element.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # We must only accept integer (or float values that are whole numbers)
+        # for periods.
+        int_periods = validate_int_kwarg(periods, "periods", float_allowed=True)
+        axis = self._get_axis_number(axis)
+        return self.__constructor__(
+            query_compiler=self._query_compiler.diff(axis=axis, periods=int_periods)
+        )
+
+    def drop(
+        self,
+        labels: IndexLabel = None,
+        axis: Axis = 0,
+        index: IndexLabel = None,
+        columns: IndexLabel = None,
+        level: Level = None,
+        inplace: bool = False,
+        errors: IgnoreRaise = "raise",
+    ) -> BasePandasDataset | None:
+        """
+        Drop specified labels from `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if labels is not None:
+            if index is not None or columns is not None:
+                raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
+            axes = {self._get_axis_number(axis): labels}
+        elif index is not None or columns is not None:
+            axes = {0: index, 1: columns}
+        else:
+            raise ValueError(
+                "Need to specify at least one of 'labels', 'index' or 'columns'"
+            )
+
+        for axis, labels in axes.items():
+            if labels is not None:
+                if level is not None and not self._query_compiler.has_multiindex(
+                    axis=axis
+                ):
+                    # Same error as native pandas.
+                    raise AssertionError("axis must be a MultiIndex")
+                # According to pandas documentation, a tuple will be used as a single
+                # label and not treated as a list-like.
+                if not is_list_like(labels) or isinstance(labels, tuple):
+                    axes[axis] = [labels]
+
+        new_query_compiler = self._query_compiler.drop(
+            index=axes.get(0), columns=axes.get(1), level=level, errors=errors
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def _dropna(
+        self,
+        axis: Axis = 0,
+        how: str | NoDefault = no_default,
+        thresh: int | NoDefault = no_default,
+        subset: IndexLabel = None,
+        inplace: bool = False,
+    ):
+        inplace = validate_bool_kwarg(inplace, "inplace")
+
+        if is_list_like(axis):
+            raise TypeError("supplying multiple axes to axis is no longer supported.")
+
+        axis = self._get_axis_number(axis)
+
+        if (how is not no_default) and (thresh is not no_default):
+            raise TypeError(
+                "You cannot set both the how and thresh arguments at the same time."
+            )
+
+        if how is no_default:
+            how = "any"
+        if how not in ["any", "all"]:
+            raise ValueError("invalid how option: %s" % how)
+        if subset is not None:
+            if axis == 1:
+                indices = self.index.get_indexer_for(subset)
+                check = indices == -1
+                if check.any():
+                    raise KeyError(list(np.compress(check, subset)))
+            else:
+                indices = self.columns.get_indexer_for(subset)
+                check = indices == -1
+                if check.any():
+                    raise KeyError(list(np.compress(check, subset)))
+
+        new_query_compiler = self._query_compiler.dropna(
+            axis=axis,
+            how=how,
+            thresh=thresh,
+            subset=subset,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def droplevel(self, level, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return `BasePandasDataset` with requested index / column level(s) removed.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        new_axis = self.axes[axis].droplevel(level)
+        result = self.copy()
+        if axis == 0:
+            result.index = new_axis
+        else:
+            result.columns = new_axis
+        return result
+
+    def drop_duplicates(
+        self, keep="first", inplace=False, **kwargs
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return `BasePandasDataset` with duplicate rows removed.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        ignore_index = kwargs.get("ignore_index", False)
+        subset = kwargs.get("subset", None)
+        if subset is not None:
+            if is_list_like(subset):
+                if not isinstance(subset, list):
+                    subset = list(subset)
+            else:
+                subset = [subset]
+            df = self[subset]
+        else:
+            df = self
+        duplicated = df.duplicated(keep=keep)
+        result = self[~duplicated]
+        if ignore_index:
+            result.index = pandas.RangeIndex(stop=len(result))
+        if inplace:
+            self._update_inplace(result._query_compiler)
+        else:
+            return result
+
+    def mask(
+        self,
+        cond: BasePandasDataset | Callable | AnyArrayLike,
+        other: BasePandasDataset | Callable | Scalar | None = np.nan,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        """
+        Replace values where the condition is True.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # TODO: https://snowflakecomputing.atlassian.net/browse/SNOW-985670
+        # will move pre-processing to QC layer.
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if cond is None:
+            raise ValueError("Array conditional must be same shape as self")
+
+        cond = apply_if_callable(cond, self)
+
+        if isinstance(cond, Callable):
+            raise NotImplementedError("Do not support callable for 'cond' parameter.")
+
+        from snowflake.snowpark.modin.pandas import Series
+
+        if isinstance(cond, Series):
+            cond._query_compiler._shape_hint = "column"
+        if isinstance(self, Series):
+            self._query_compiler._shape_hint = "column"
+        if isinstance(other, Series):
+            other._query_compiler._shape_hint = "column"
+
+        if not isinstance(cond, BasePandasDataset):
+            cond = get_as_shape_compatible_dataframe_or_series(cond, self)
+            cond._query_compiler._shape_hint = "array"
+
+        if other is not None:
+            other = apply_if_callable(other, self)
+
+            if isinstance(other, np.ndarray):
+                other = get_as_shape_compatible_dataframe_or_series(
+                    other,
+                    self,
+                    shape_mismatch_message="other must be the same shape as self when an ndarray",
+                )
+                other._query_compiler._shape_hint = "array"
+
+            if isinstance(other, BasePandasDataset):
+                other = other._query_compiler
+
+        query_compiler = self._query_compiler.mask(
+            cond._query_compiler,
+            other,
+            axis,
+            level,
+        )
+
+        return self._create_or_update_from_compiler(query_compiler, inplace)
+
+    def where(
+        self,
+        cond: BasePandasDataset | Callable | AnyArrayLike,
+        other: BasePandasDataset | Callable | Scalar | None = np.nan,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        """
+        Replace values where the condition is False.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # TODO: SNOW-985670: Refactor `where` and `mask`
+        # will move pre-processing to QC layer.
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if cond is None:
+            raise ValueError("Array conditional must be same shape as self")
+
+        cond = apply_if_callable(cond, self)
+
+        if isinstance(cond, Callable):
+            raise NotImplementedError("Do not support callable for 'cond' parameter.")
+
+        from snowflake.snowpark.modin.pandas import Series
+
+        if isinstance(cond, Series):
+            cond._query_compiler._shape_hint = "column"
+        if isinstance(self, Series):
+            self._query_compiler._shape_hint = "column"
+        if isinstance(other, Series):
+            other._query_compiler._shape_hint = "column"
+
+        if not isinstance(cond, BasePandasDataset):
+            cond = get_as_shape_compatible_dataframe_or_series(cond, self)
+            cond._query_compiler._shape_hint = "array"
+
+        if other is not None:
+            other = apply_if_callable(other, self)
+
+            if isinstance(other, np.ndarray):
+                other = get_as_shape_compatible_dataframe_or_series(
+                    other,
+                    self,
+                    shape_mismatch_message="other must be the same shape as self when an ndarray",
+                )
+                other._query_compiler._shape_hint = "array"
+
+            if isinstance(other, BasePandasDataset):
+                other = other._query_compiler
+
+        query_compiler = self._query_compiler.where(
+            cond._query_compiler,
+            other,
+            axis,
+            level,
+        )
+
+        return self._create_or_update_from_compiler(query_compiler, inplace)
+
+    def eq(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_)
+
+    def explode(self, column, ignore_index: bool = False):  # noqa: PR01, RT01, D200
+        """
+        Transform each element of a list-like to a row.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        exploded = self.__constructor__(
+            query_compiler=self._query_compiler.explode(column)
+        )
+        if ignore_index:
+            exploded = exploded.reset_index(drop=True)
+        return exploded
+
+    def ewm(
+        self,
+        com: float | None = None,
+        span: float | None = None,
+        halflife: float | TimedeltaConvertibleTypes | None = None,
+        alpha: float | None = None,
+        min_periods: int | None = 0,
+        adjust: bool = True,
+        ignore_na: bool = False,
+        axis: Axis = 0,
+        times: str | np.ndarray | BasePandasDataset | None = None,
+        method: str = "single",
+    ) -> pandas.core.window.ewm.ExponentialMovingWindow:  # noqa: PR01, RT01, D200
+        """
+        Provide exponentially weighted (EW) calculations.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "ewm",
+            com=com,
+            span=span,
+            halflife=halflife,
+            alpha=alpha,
+            min_periods=min_periods,
+            adjust=adjust,
+            ignore_na=ignore_na,
+            axis=axis,
+            times=times,
+            method=method,
+        )
+
+    def expanding(
+        self, min_periods=1, axis=0, method="single"
+    ):  # noqa: PR01, RT01, D200
+        """
+        Provide expanding window calculations.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "expanding",
+            min_periods=min_periods,
+            axis=axis,
+            method=method,
+        )
+
+    def ffill(
+        self,
+        axis: Axis | None = None,
+        inplace: bool = False,
+        limit: int | None = None,
+        downcast: dict | None = None,
+    ):
+        """
+        Synonym for `DataFrame.fillna` with ``method='ffill'``.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.fillna(
+            method="ffill", axis=axis, limit=limit, downcast=downcast, inplace=inplace
+        )
+
+    pad = ffill
+
+    def fillna(
+        self,
+        self_is_series,
+        value: Hashable | Mapping | pd.Series | pd.DataFrame = None,
+        method: FillnaOptions | None = None,
+        axis: Axis | None = None,
+        inplace: bool = False,
+        limit: int | None = None,
+        downcast: dict | None = None,
+    ):
+        """
+        Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        self_is_series : bool
+            If True then self contains a Series object, if False then self contains
+            a DataFrame object.
+        value : scalar, dict, Series, or DataFrame, default: None
+            Value to use to fill holes (e.g. 0), alternately a
+            dict/Series/DataFrame of values specifying which value to use for
+            each index (for a Series) or column (for a DataFrame).  Values not
+            in the dict/Series/DataFrame will not be filled. This value cannot
+            be a list.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use next valid observation to fill gap.
+        axis : {None, 0, 1}, default: None
+            Axis along which to fill missing values.
+        inplace : bool, default: False
+            If True, fill in-place. Note: this will modify any
+            other views on this object (e.g., a no-copy slice for a column in a
+            DataFrame).
+        limit : int, default: None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled. Must be greater than 0 if not None.
+        downcast : dict, default: None
+            A dict of item->dtype of what to downcast if possible,
+            or the string 'infer' which will try to downcast to an appropriate
+            equal type (e.g. float64 to int64 if possible).
+
+        Returns
+        -------
+        Series, DataFrame or None
+            Object with missing values filled or None if ``inplace=True``.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        raise_if_native_pandas_objects(value)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        axis = self._get_axis_number(axis)
+        if isinstance(value, (list, tuple)):
+            raise TypeError(
+                '"value" parameter must be a scalar or dict, but '
+                + f'you passed a "{type(value).__name__}"'
+            )
+        if value is None and method is None:
+            # same as pandas
+            raise ValueError("Must specify a fill 'value' or 'method'.")
+        if value is not None and method is not None:
+            raise ValueError("Cannot specify both 'value' and 'method'.")
+        if method is not None and method not in ["backfill", "bfill", "pad", "ffill"]:
+            expecting = "pad (ffill) or backfill (bfill)"
+            msg = "Invalid fill method. Expecting {expecting}. Got {method}".format(
+                expecting=expecting, method=method
+            )
+            raise ValueError(msg)
+        if limit is not None:
+            if not isinstance(limit, int):
+                raise ValueError("Limit must be an integer")
+            elif limit <= 0:
+                raise ValueError("Limit must be greater than 0")
+
+        new_query_compiler = self._query_compiler.fillna(
+            self_is_series=self_is_series,
+            value=value,
+            method=method,
+            axis=axis,
+            limit=limit,
+            downcast=downcast,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def filter(
+        self, items=None, like=None, regex=None, axis=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Subset the `BasePandasDataset` rows or columns according to the specified index labels.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        nkw = count_not_none(items, like, regex)
+        if nkw > 1:
+            raise TypeError(
+                "Keyword arguments `items`, `like`, or `regex` are mutually exclusive"
+            )
+        if nkw == 0:
+            raise TypeError("Must pass either `items`, `like`, or `regex`")
+        if axis is None:
+            axis = "columns"  # This is the default info axis for dataframes
+
+        axis = self._get_axis_number(axis)
+        labels = self.columns if axis else self.index
+
+        if items is not None:
+            bool_arr = labels.isin(items)
+        elif like is not None:
+
+            def f(x):
+                return like in str(x)
+
+            bool_arr = labels.map(f).tolist()
+        else:
+
+            def f(x):
+                return matcher.search(str(x)) is not None
+
+            matcher = re.compile(regex)
+            bool_arr = labels.map(f).tolist()
+        if not axis:
+            return self[bool_arr]
+        return self[self.columns[bool_arr]]
+
+    def first(self, offset):  # noqa: PR01, RT01, D200
+        """
+        Select initial periods of time series data based on a date offset.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.loc[pandas.Series(index=self.index).first(offset).index]
+
+    def first_valid_index(self) -> Scalar | tuple[Scalar]:
+        """
+        Return index for first non-NA value or None, if no non-NA value is found.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._query_compiler.first_valid_index()
+
+    def floordiv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "floordiv", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def ge(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("ge", other, axis=axis, level=level, dtypes=np.bool_)
+
+    def get(self, key, default=None):  # noqa: PR01, RT01, D200
+        """
+        Get item from object for given key.
+        """
+        try:
+            return self.__getitem__(key)
+        except (KeyError, ValueError, IndexError):
+            return default
+
+    def gt(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("gt", other, axis=axis, level=level, dtypes=np.bool_)
+
+    def head(self, n: int = 5):
+        """
+        Return the first `n` rows.
+        """
+        return self.iloc[:n]
+
+    @property
+    def iat(self, axis=None):  # noqa: PR01, RT01, D200
+        """
+        Get a single value for a row/column pair by integer position.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        from .indexing import _iLocIndexer
+
+        return _iLocIndexer(self)
+
+    def idxmax(self, axis=0, skipna=True, numeric_only=False):  # noqa: PR01, RT01, D200
+        """
+        Return index of first occurrence of maximum over requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        dtypes = self._get_dtypes()
+        if (
+            axis == 1
+            and not numeric_only
+            and any(not is_numeric_dtype(d) for d in dtypes)
+            and len(set(dtypes)) > 1
+        ):
+            # For numeric_only=False, if we have any non-numeric dtype, e.g.
+            # a string type, we need every other column to be of the same type.
+            # We can't compare two objects of different non-numeric types, e.g.
+            # a string and a timestamp.
+            # If we have only numeric data, we can compare columns even if they
+            # different types, e.g. we can compare an int column to a float
+            # column.
+            raise TypeError("'>' not supported for these dtypes")
+        axis = self._get_axis_number(axis)
+        return self._reduce_dimension(
+            self._query_compiler.idxmax(
+                axis=axis, skipna=skipna, numeric_only=numeric_only
+            )
+        )
+
+    def idxmin(self, axis=0, skipna=True, numeric_only=False):  # noqa: PR01, RT01, D200
+        """
+        Return index of first occurrence of minimum over requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        dtypes = self._get_dtypes()
+        if (
+            axis == 1
+            and not numeric_only
+            and any(not is_numeric_dtype(d) for d in dtypes)
+            and len(set(dtypes)) > 1
+        ):
+            # For numeric_only=False, if we have any non-numeric dtype, e.g.
+            # a string type, we need every other column to be of the same type.
+            # We can't compare two objects of different non-numeric types, e.g.
+            # a string and a timestamp.
+            # If we have only numeric data, we can compare columns even if they
+            # different types, e.g. we can compare an int column to a float
+            # column.
+            raise TypeError("'<' not supported for these dtypes")
+        axis = self._get_axis_number(axis)
+        return self._reduce_dimension(
+            self._query_compiler.idxmin(
+                axis=axis, skipna=skipna, numeric_only=numeric_only
+            )
+        )
+
+    def infer_objects(
+        self, copy: bool | None = None
+    ) -> BasePandasDataset:  # pragma: no cover # noqa: RT01, D200
+        """
+        Attempt to infer better dtypes for object columns.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # This method is currently overriden in dataframe_overrides.py and series_overrides.py
+        # and raises NotImplementedError
+        new_query_compiler = self._query_compiler.infer_objects()
+        return self._create_or_update_from_compiler(
+            new_query_compiler, inplace=False if copy is None else not copy
+        )
+
+    def convert_dtypes(
+        self,
+        infer_objects: bool = True,
+        convert_string: bool = True,
+        convert_integer: bool = True,
+        convert_boolean: bool = True,
+        convert_floating: bool = True,
+        dtype_backend: DtypeBackend = "numpy_nullable",
+    ):  # noqa: PR01, RT01, D200
+        """
+        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(
+            query_compiler=self._query_compiler.convert_dtypes(
+                infer_objects=infer_objects,
+                convert_string=convert_string,
+                convert_integer=convert_integer,
+                convert_boolean=convert_boolean,
+                convert_floating=convert_floating,
+                dtype_backend=dtype_backend,
+            )
+        )
+
+    def isin(
+        self, values: BasePandasDataset | ListLike | dict[Hashable, ListLike]
+    ) -> BasePandasDataset:  # noqa: PR01, RT01, D200
+        """
+        Whether elements in `BasePandasDataset` are contained in `values`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+
+        # Pass as query compiler if values is BasePandasDataset.
+        if isinstance(values, BasePandasDataset):
+            values = values._query_compiler
+
+        # Convert non-dict values to List if values is neither List[Any] nor np.ndarray. SnowflakeQueryCompiler
+        # expects for the non-lazy case, where values is not a BasePandasDataset, the data to be materialized
+        # as list or numpy array. Because numpy may perform implicit type conversions, use here list to be more general.
+        elif not isinstance(values, dict) and (
+            not isinstance(values, list) or not isinstance(values, np.ndarray)
+        ):
+            values = list(values)
+
+        return self.__constructor__(
+            query_compiler=self._query_compiler.isin(values=values)
+        )
+
+    def isna(self):  # noqa: RT01, D200
+        """
+        Detect missing values.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(query_compiler=self._query_compiler.isna())
+
+    isnull = isna
+
+    @property
+    def iloc(self):
+        """
+        Purely integer-location based indexing for selection by position.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # TODO: SNOW-930028 enable all skipped doctests
+        from .indexing import _iLocIndexer
+
+        return _iLocIndexer(self)
+
+    def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        axis = self._get_axis_number(axis)
+        if numeric_only is not None and not numeric_only:
+            self._validate_dtypes(numeric_only=True)
+
+        data = (
+            self._get_numeric_data(axis)
+            if numeric_only is None or numeric_only
+            else self
+        )
+
+        return self._reduce_dimension(
+            data._query_compiler.kurt(
+                axis=axis,
+                skipna=skipna,
+                numeric_only=numeric_only,
+                **kwargs,
+            )
+        )
+
+    kurtosis = kurt
+
+    def last(self, offset):  # noqa: PR01, RT01, D200
+        """
+        Select final periods of time series data based on a date offset.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.loc[pandas.Series(index=self.index).last(offset).index]
+
+    def last_valid_index(self) -> Scalar | tuple[Scalar]:
+        """
+        Return index for last non-NA value or None, if no non-NA value is found.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._query_compiler.last_valid_index()
+
+    def le(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("le", other, axis=axis, level=level, dtypes=np.bool_)
+
+    def lt(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("lt", other, axis=axis, level=level, dtypes=np.bool_)
+
+    @property
+    def loc(self):
+        """
+        Get a group of rows and columns by label(s) or a boolean array.
+        """
+        # TODO: SNOW-935444 fix doctest where index key has name
+        # TODO: SNOW-933782 fix multiindex transpose bug, e.g., Name: (cobra, mark ii) => Name: ('cobra', 'mark ii')
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        from .indexing import _LocIndexer
+
+        return _LocIndexer(self)
+
+    def _agg_helper(
+        self,
+        func: str,
+        skipna: bool = True,
+        axis: int | None | NoDefault = no_default,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        if not self._is_dataframe and numeric_only and not is_numeric_dtype(self.dtype):
+            # Series aggregations on non-numeric data do not support numeric_only:
+            # https://github.com/pandas-dev/pandas/blob/cece8c6579854f6b39b143e22c11cac56502c4fd/pandas/core/series.py#L6358
+            raise TypeError(
+                f"Series.{func} does not allow numeric_only=True with non-numeric dtypes."
+            )
+        axis = self._get_axis_number(axis)
+        numeric_only = validate_bool_kwarg(
+            numeric_only, "numeric_only", none_allowed=True
+        )
+        skipna = validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        agg_kwargs: dict[str, Any] = {
+            "numeric_only": numeric_only,
+            "skipna": skipna,
+        }
+        agg_kwargs.update(kwargs)
+        return self.aggregate(func=func, axis=axis, **agg_kwargs)
+
+    def max(
+        self,
+        axis: Axis | None = 0,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        """
+        Return the maximum of the values over the requested axis.
+        """
+        return self._agg_helper(
+            func="max",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def _stat_operation(
+        self,
+        op_name: str,
+        axis: int | str,
+        skipna: bool,
+        numeric_only: bool = False,
+        **kwargs,
+    ):
+        """
+        Do common statistic reduce operations under frame.
+
+        Parameters
+        ----------
+        op_name : str
+            Name of method to apply.
+        axis : int or str
+            Axis to apply method on.
+        skipna : bool
+            Exclude NA/null values when computing the result.
+        numeric_only : bool
+            Include only float, int, boolean columns.
+        **kwargs : dict
+            Additional keyword arguments to pass to `op_name`.
+
+        Returns
+        -------
+        scalar or Series
+            `scalar` - self is Series
+            `Series` -  self is DataFrame
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        if not numeric_only:
+            self._validate_dtypes(numeric_only=True)
+
+        data = self._get_numeric_data(axis) if numeric_only else self
+        result_qc = getattr(data._query_compiler, op_name)(
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+        result_qc = self._reduce_dimension(result_qc)
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # This pattern is seen throughout this file so we should try to correct it
+        # when we have a more general way of resetting the name to None
+        from snowflake.snowpark.modin.pandas import Series
+
+        if isinstance(result_qc, Series):
+            result_qc.name = None
+        return result_qc
+
+    def memory_usage(self, index=True, deep=False):  # noqa: PR01, RT01, D200
+        """
+        Return the memory usage of the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._reduce_dimension(
+            self._query_compiler.memory_usage(index=index, deep=deep)
+        )
+
+    def min(
+        self,
+        axis: Axis | None | NoDefault = no_default,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs,
+    ):
+        """
+        Return the minimum of the values over the requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._agg_helper(
+            func="min",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def mod(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "mod", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def mode(self, axis=0, numeric_only=False, dropna=True):  # noqa: PR01, RT01, D200
+        """
+        Get the mode(s) of each element along the selected axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        return self.__constructor__(
+            query_compiler=self._query_compiler.mode(
+                axis=axis, numeric_only=numeric_only, dropna=dropna
+            )
+        )
+
+    def mul(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "mul", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    multiply = mul
+
+    def ne(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("ne", other, axis=axis, level=level, dtypes=np.bool_)
+
+    def notna(self):  # noqa: RT01, D200
+        """
+        Detect existing (non-missing) values.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(query_compiler=self._query_compiler.notna())
+
+    notnull = notna
+
+    def nunique(self, axis=0, dropna=True):  # noqa: PR01, RT01, D200
+        """
+        Return number of unique elements in the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        from snowflake.snowpark.modin.pandas import Series
+
+        axis = self._get_axis_number(axis)
+        result = self._reduce_dimension(
+            self._query_compiler.nunique(axis=axis, dropna=dropna)
+        )
+        if isinstance(result, Series):
+            result.name = None
+        return result
+
+    def pct_change(
+        self, periods=1, fill_method="pad", limit=None, freq=None, **kwargs
+    ):  # noqa: PR01, RT01, D200
+        """
+        Percentage change between the current and a prior element.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "pct_change",
+            periods=periods,
+            fill_method=fill_method,
+            limit=limit,
+            freq=freq,
+            **kwargs,
+        )
+
+    def pipe(self, func, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Apply chainable functions that expect `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return pipe(self, func, *args, **kwargs)
+
+    def pop(self, item):  # noqa: PR01, RT01, D200
+        """
+        Return item and drop from frame. Raise KeyError if not found.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        result = self[item]
+        del self[item]
+        return result
+
+    def pow(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "pow", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def quantile(
+        self,
+        q: Scalar | ListLike = 0.5,
+        axis: Axis = 0,
+        numeric_only: bool = False,
+        interpolation: Literal[
+            "linear", "lower", "higher", "midpoint", "nearest"
+        ] = "linear",
+        method: Literal["single", "table"] = "single",
+    ) -> float | BasePandasDataset:
+        """
+        Return values at the given quantile over requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+
+        # TODO
+        # - SNOW-1008361: support axis=1
+        # - SNOW-1008367: support when q is Snowpandas DF/Series (need to require QC interface to accept QC q values)
+        # - SNOW-1003587: support datetime/timedelta columns
+
+        if (
+            axis == 1
+            or interpolation not in ["linear", "nearest"]
+            or method != "single"
+        ):
+            ErrorMessage.not_implemented(
+                f"quantile function with parameters axis={axis}, interpolation={interpolation}, method={method} not supported"
+            )
+
+        if not numeric_only:
+            # If not numeric_only and columns, then check all columns are either
+            # numeric, timestamp, or timedelta
+            # Check if dtype is numeric, timedelta ("m"), or datetime ("M")
+            if not axis and not all(
+                is_numeric_dtype(t) or lib.is_np_dtype(t, "mM")
+                for t in self._get_dtypes()
+            ):
+                raise TypeError("can't multiply sequence by non-int of type 'float'")
+            # If over rows, then make sure that all dtypes are equal for not
+            # numeric_only
+            elif axis:
+                for i in range(1, len(self._get_dtypes())):
+                    pre_dtype = self._get_dtypes()[i - 1]
+                    curr_dtype = self._get_dtypes()[i]
+                    if not is_dtype_equal(pre_dtype, curr_dtype):
+                        raise TypeError(
+                            "Cannot compare type '{}' with type '{}'".format(
+                                pre_dtype, curr_dtype
+                            )
+                        )
+        else:
+            # Normally pandas returns this near the end of the quantile, but we
+            # can't afford the overhead of running the entire operation before
+            # we error.
+            if not any(is_numeric_dtype(t) for t in self._get_dtypes()):
+                raise ValueError("need at least one array to concatenate")
+
+        # check that all qs are between 0 and 1
+        validate_percentile(q)
+        axis = self._get_axis_number(axis)
+        query_compiler = self._query_compiler.quantiles_along_axis0(
+            q=q if is_list_like(q) else [q],
+            numeric_only=numeric_only,
+            interpolation=interpolation,
+            method=method,
+        )
+        if is_list_like(q):
+            return self.__constructor__(query_compiler=query_compiler)
+        else:
+            # result is either a scalar or Series
+            result = self._reduce_dimension(query_compiler.transpose_single_row())
+            if isinstance(result, BasePandasDataset):
+                result.name = q
+            return result
+
+    @_inherit_docstrings(pandas.DataFrame.rank, apilink="pandas.DataFrame.rank")
+    def rank(
+        self,
+        axis=0,
+        method: str = "average",
+        numeric_only: bool = False,
+        na_option: str = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        return self.__constructor__(
+            query_compiler=self._query_compiler.rank(
+                axis=axis,
+                method=method,
+                numeric_only=numeric_only,
+                na_option=na_option,
+                ascending=ascending,
+                pct=pct,
+            )
+        )
+
+    def _copy_index_metadata(self, source, destination):  # noqa: PR01, RT01, D200
+        """
+        Copy Index metadata from `source` to `destination` inplace.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if hasattr(source, "name") and hasattr(destination, "name"):
+            destination.name = source.name
+        if hasattr(source, "names") and hasattr(destination, "names"):
+            destination.names = source.names
+        return destination
+
+    def _ensure_index(self, index_like, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Ensure that we have an index from some index-like object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if (
+            self._query_compiler.has_multiindex(axis=axis)
+            and not isinstance(index_like, pandas.Index)
+            and is_list_like(index_like)
+            and len(index_like) > 0
+            and isinstance(index_like[0], tuple)
+        ):
+            try:
+                return pandas.MultiIndex.from_tuples(index_like)
+            except TypeError:
+                # not all tuples
+                pass
+        return ensure_index(index_like)
+
+    def reindex(
+        self,
+        index=None,
+        columns=None,
+        copy=True,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Conform `BasePandasDataset` to new index with optional filling logic.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        new_query_compiler = None
+        if index is not None:
+            if not isinstance(index, pandas.Index) or not index.equals(self.index):
+                new_query_compiler = self._query_compiler.reindex(
+                    axis=0, labels=index, **kwargs
+                )
+        if new_query_compiler is None:
+            new_query_compiler = self._query_compiler
+        final_query_compiler = None
+        if columns is not None:
+            if not isinstance(index, pandas.Index) or not columns.equals(self.columns):
+                final_query_compiler = new_query_compiler.reindex(
+                    axis=1, labels=columns, **kwargs
+                )
+        if final_query_compiler is None:
+            final_query_compiler = new_query_compiler
+        return self._create_or_update_from_compiler(
+            final_query_compiler, inplace=False if copy is None else not copy
+        )
+
+    def reindex_like(
+        self, other, method=None, copy=True, limit=None, tolerance=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return an object with matching indices as `other` object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "reindex_like",
+            other,
+            method=method,
+            copy=copy,
+            limit=limit,
+            tolerance=tolerance,
+        )
+
+    def rename_axis(
+        self,
+        mapper=lib.no_default,
+        *,
+        index=lib.no_default,
+        columns=lib.no_default,
+        axis=0,
+        copy=None,
+        inplace=False,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Set the name of the axis for the index or columns.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axes = {"index": index, "columns": columns}
+
+        if copy is None:
+            copy = True
+
+        if axis is not None:
+            axis = self._get_axis_number(axis)
+        else:
+            axis = 0
+
+        inplace = validate_bool_kwarg(inplace, "inplace")
+
+        if mapper is not lib.no_default and mapper is not None:
+            # Use v0.23 behavior if a scalar or list
+            non_mapper = is_scalar(mapper) or (
+                is_list_like(mapper) and not is_dict_like(mapper)
+            )
+            if non_mapper:
+                return self._set_axis_name(mapper, axis=axis, inplace=inplace)
+            else:
+                raise ValueError("Use `.rename` to alter labels with a mapper.")
+        else:
+            # Use new behavior.  Means that index and/or columns is specified
+            result = self if inplace else self.copy(deep=copy)
+
+            for axis in range(self.ndim):
+                v = axes.get(pandas.DataFrame._get_axis_name(axis))
+                if v is lib.no_default:
+                    continue
+                non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
+                if non_mapper:
+                    newnames = v
+                else:
+
+                    def _get_rename_function(mapper):
+                        if isinstance(mapper, (dict, BasePandasDataset)):
+
+                            def f(x):
+                                if x in mapper:
+                                    return mapper[x]
+                                else:
+                                    return x
+
+                        else:
+                            f = mapper
+
+                        return f
+
+                    f = _get_rename_function(v)
+                    curnames = self.index.names if axis == 0 else self.columns.names
+                    newnames = [f(name) for name in curnames]
+                result._set_axis_name(newnames, axis=axis, inplace=True)
+            if not inplace:
+                return result
+
+    def reorder_levels(self, order, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Rearrange index levels using input order.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        new_labels = self.axes[axis].reorder_levels(order)
+        return self.set_axis(new_labels, axis=axis)
+
+    def resample(
+        self,
+        rule,
+        axis: Axis = lib.no_default,
+        closed: str | None = None,
+        label: str | None = None,
+        convention: str = "start",
+        kind: str | None = None,
+        on: Level = None,
+        level: Level = None,
+        origin: str | TimestampConvertibleTypes = "start_day",
+        offset: TimedeltaConvertibleTypes | None = None,
+        group_keys=no_default,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Resample time-series data.
+        """
+        from .resample import Resampler
+
+        if axis is not lib.no_default:  # pragma: no cover
+            axis = self._get_axis_number(axis)
+            if axis == 1:
+                warnings.warn(
+                    "DataFrame.resample with axis=1 is deprecated. Do "
+                    + "`frame.T.resample(...)` without axis instead.",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+            else:
+                warnings.warn(
+                    f"The 'axis' keyword in {type(self).__name__}.resample is "
+                    + "deprecated and will be removed in a future version.",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+        else:
+            axis = 0
+
+        return Resampler(
+            dataframe=self,
+            rule=rule,
+            axis=axis,
+            closed=closed,
+            label=label,
+            convention=convention,
+            kind=kind,
+            on=on,
+            level=level,
+            origin=origin,
+            offset=offset,
+            group_keys=group_keys,
+        )
+
+    def reset_index(
+        self,
+        level: IndexLabel = None,
+        drop: bool = False,
+        inplace: bool = False,
+        col_level: Hashable = 0,
+        col_fill: Hashable = "",
+        allow_duplicates=no_default,
+        names: Hashable | Sequence[Hashable] = None,
+    ):
+        """
+        Reset the index, or a level of it.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if allow_duplicates is no_default:
+            allow_duplicates = False
+        new_query_compiler = self._query_compiler.reset_index(
+            drop=drop,
+            level=level,
+            col_level=col_level,
+            col_fill=col_fill,
+            allow_duplicates=allow_duplicates,
+            names=names,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def radd(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "radd", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rfloordiv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rfloordiv", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rmod(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rmod", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rmul(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get Multiplication of dataframe and other, element-wise (binary operator `rmul`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rmul", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rolling(
+        self,
+        window,
+        min_periods: int | None = None,
+        center: bool = False,
+        win_type: str | None = None,
+        on: str | None = None,
+        axis: Axis = lib.no_default,
+        closed: str | None = None,
+        step: int | None = None,
+        method: str = "single",
+    ):  # noqa: PR01, RT01, D200
+        """
+        Provide rolling window calculations.
+        """
+        if axis is not lib.no_default:
+            axis = self._get_axis_number(axis)
+            name = "rolling"
+            if axis == 1:
+                warnings.warn(
+                    f"Support for axis=1 in {type(self).__name__}.{name} is "
+                    + "deprecated and will be removed in a future version. "
+                    + f"Use obj.T.{name}(...) instead",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+            else:  # pragma: no cover
+                warnings.warn(
+                    f"The 'axis' keyword in {type(self).__name__}.{name} is "
+                    + "deprecated and will be removed in a future version. "
+                    + "Call the method without the axis keyword instead.",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+        else:
+            axis = 0
+
+        if win_type is not None:
+            from .window import Window
+
+            return Window(
+                self,
+                window=window,
+                min_periods=min_periods,
+                center=center,
+                win_type=win_type,
+                on=on,
+                axis=axis,
+                closed=closed,
+                step=step,
+                method=method,
+            )
+        from .window import Rolling
+
+        return Rolling(
+            self,
+            window=window,
+            min_periods=min_periods,
+            center=center,
+            win_type=win_type,
+            on=on,
+            axis=axis,
+            closed=closed,
+            step=step,
+            method=method,
+        )
+
+    def round(self, decimals=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Round a `BasePandasDataset` to a variable number of decimal places.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility
+        # purpose and does not affect the result, we shouldn't pass them to the query compiler.
+        return self.__constructor__(
+            query_compiler=self._query_compiler.round(decimals=decimals, **kwargs)
+        )
+
+    def rpow(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rpow", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rsub(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rsub", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    def rtruediv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "rtruediv", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    rdiv = rtruediv
+
+    def sample(
+        self,
+        n: int | None = None,
+        frac: float | None = None,
+        replace: bool = False,
+        weights: str | np.ndarray | None = None,
+        random_state: RandomState | None = None,
+        axis: Axis | None = None,
+        ignore_index: bool = False,
+    ):
+        """
+        Return a random sample of items from an axis of object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if self._get_axis_number(axis):
+            if weights is not None and isinstance(weights, str):
+                raise ValueError(
+                    "Strings can only be passed to weights when sampling from rows on a DataFrame"
+                )
+        else:
+            if n is None and frac is None:
+                n = 1
+            elif n is not None and frac is not None:
+                raise ValueError("Please enter a value for `frac` OR `n`, not both")
+            else:
+                if n is not None:
+                    if n < 0:
+                        raise ValueError(
+                            "A negative number of rows requested. Please provide `n` >= 0."
+                        )
+                    if n % 1 != 0:
+                        raise ValueError("Only integers accepted as `n` values")
+                else:
+                    if frac < 0:
+                        raise ValueError(
+                            "A negative number of rows requested. Please provide `frac` >= 0."
+                        )
+
+        query_compiler = self._query_compiler.sample(
+            n, frac, replace, weights, random_state, axis, ignore_index
+        )
+        return self.__constructor__(query_compiler=query_compiler)
+
+    def sem(
+        self,
+        axis: Axis | None = None,
+        skipna: bool = True,
+        ddof: int = 1,
+        numeric_only=False,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return unbiased standard error of the mean over requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._stat_operation(
+            "sem", axis, skipna, numeric_only, ddof=ddof, **kwargs
+        )
+
+    def mean(
+        self,
+        axis: Axis | None | NoDefault = no_default,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        """
+        Return the mean of the values over the requested axis.
+        """
+        return self._agg_helper(
+            func="mean",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def median(
+        self,
+        axis: Axis | None | NoDefault = no_default,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        """
+        Return the mean of the values over the requested axis.
+        """
+        return self._agg_helper(
+            func="median",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def set_flags(
+        self, *, copy: bool = False, allows_duplicate_labels: bool | None = None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return a new `BasePandasDataset` with updated flags.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.set_flags,
+            copy=copy,
+            allows_duplicate_labels=allows_duplicate_labels,
+        )
+
+    @property
+    def flags(self):
+        return self._default_to_pandas(lambda df: df.flags)
+
+    def shift(
+        self,
+        periods: int = 1,
+        freq=None,
+        axis: Axis = 0,
+        fill_value: Hashable = no_default,
+    ) -> BasePandasDataset:
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if periods == 0 and freq is None:
+            # Check obvious case first, freq manipulates the index even for periods == 0 so check for it in addition.
+            return self.copy()
+
+        # pandas compatible ValueError for freq='infer'
+        # TODO: Test as part of SNOW-1023324.
+        if freq == "infer":  # pragma: no cover
+            if not hasattr(self, "freq") and not hasattr(  # pragma: no cover
+                self, "inferred_freq"  # pragma: no cover
+            ):  # pragma: no cover
+                raise ValueError()  # pragma: no cover
+
+        axis = self._get_axis_number(axis)
+
+        if fill_value == no_default:
+            fill_value = None
+
+        new_query_compiler = self._query_compiler.shift(periods, freq, axis, fill_value)
+        return self._create_or_update_from_compiler(new_query_compiler, False)
+
+    def skew(
+        self,
+        axis: Axis | None | NoDefault = no_default,
+        skipna: bool = True,
+        numeric_only=True,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        """
+        Return unbiased skew over requested axis.
+        """
+        return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs)
+
+    def sort_index(
+        self,
+        axis=0,
+        level=None,
+        ascending=True,
+        inplace=False,
+        kind="quicksort",
+        na_position="last",
+        sort_remaining=True,
+        ignore_index: bool = False,
+        key: IndexKeyFunc | None = None,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Sort object by labels (along an axis).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        # pandas throws this exception. See pandas issue #39434
+        if ascending is None:
+            raise ValueError(
+                "the `axis` parameter is not supported in the pandas implementation of argsort()"
+            )
+        axis = self._get_axis_number(axis)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        new_query_compiler = self._query_compiler.sort_index(
+            axis=axis,
+            level=level,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            sort_remaining=sort_remaining,
+            ignore_index=ignore_index,
+            key=key,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def sort_values(
+        self,
+        by,
+        axis=0,
+        ascending=True,
+        inplace: bool = False,
+        kind="quicksort",
+        na_position="last",
+        ignore_index: bool = False,
+        key: IndexKeyFunc | None = None,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Sort by the values along either axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        axis = self._get_axis_number(axis)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        ascending = validate_ascending(ascending)
+        if axis == 0:
+            # If any column is None raise KeyError (same a native pandas).
+            if by is None or (isinstance(by, list) and None in by):
+                # Same error message as native pandas.
+                raise KeyError(None)
+            if not isinstance(by, list):
+                by = [by]
+
+            # Convert 'ascending' to sequence if needed.
+            if not isinstance(ascending, Sequence):
+                ascending = [ascending] * len(by)
+            if len(by) != len(ascending):
+                # Same error message as native pandas.
+                raise ValueError(
+                    f"Length of ascending ({len(ascending)})"
+                    f" != length of by ({len(by)})"
+                )
+
+            columns = self._query_compiler.columns.values.tolist()
+            index_names = self._query_compiler.get_index_names()
+            for by_col in by:
+                col_count = columns.count(by_col)
+                index_count = index_names.count(by_col)
+                if col_count == 0 and index_count == 0:
+                    # Same error message as native pandas.
+                    raise KeyError(by_col)
+                if col_count and index_count:
+                    # Same error message as native pandas.
+                    raise ValueError(
+                        f"'{by_col}' is both an index level and a column label, which is ambiguous."
+                    )
+                if col_count > 1:
+                    # Same error message as native pandas.
+                    raise ValueError(f"The column label '{by_col}' is not unique.")
+
+            if na_position not in get_args(NaPosition):
+                # Same error message as native pandas for invalid 'na_position' value.
+                raise ValueError(f"invalid na_position: {na_position}")
+            result = self._query_compiler.sort_rows_by_column_values(
+                by,
+                ascending=ascending,
+                kind=kind,
+                na_position=na_position,
+                ignore_index=ignore_index,
+                key=key,
+            )
+        else:
+            result = self._query_compiler.sort_columns_by_row_values(
+                by,
+                ascending=ascending,
+                kind=kind,
+                na_position=na_position,
+                ignore_index=ignore_index,
+                key=key,
+            )
+        return self._create_or_update_from_compiler(result, inplace)
+
+    def std(
+        self,
+        axis: Axis | None = None,
+        skipna: bool = True,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs,
+    ):
+        """
+        Return sample standard deviation over requested axis.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        kwargs.update({"ddof": ddof})
+        return self._agg_helper(
+            func="std",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def sum(
+        self,
+        axis: Axis | None = None,
+        skipna: bool = True,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        min_count = validate_int_kwarg(min_count, "min_count")
+        kwargs.update({"min_count": min_count})
+        return self._agg_helper(
+            func="sum",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def sub(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "sub", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    subtract = sub
+
+    def swapaxes(self, axis1, axis2, copy=True):  # noqa: PR01, RT01, D200
+        """
+        Interchange axes and swap values axes appropriately.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis1 = self._get_axis_number(axis1)
+        axis2 = self._get_axis_number(axis2)
+        if axis1 != axis2:
+            return self.transpose()
+        if copy:
+            return self.copy()
+        return self
+
+    def swaplevel(self, i=-2, j=-1, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Swap levels `i` and `j` in a `MultiIndex`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        idx = self.index if axis == 0 else self.columns
+        return self.set_axis(idx.swaplevel(i, j), axis=axis)
+
+    def tail(self, n: int = 5):
+        if n == 0:
+            return self.iloc[0:0]
+        return self.iloc[-n:]
+
+    def take(
+        self,
+        indices: list | AnyArrayLike | slice,
+        axis: Axis = 0,
+        **kwargs,
+    ):
+        """
+        Return the elements in the given *positional* indices along an axis.
+        """
+        axis = self._get_axis_number(axis)
+        slice_obj = indices if axis == 0 else (slice(None), indices)
+        return self.iloc[slice_obj]
+
+    def to_clipboard(
+        self, excel=True, sep=None, **kwargs
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Copy object to the system clipboard.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas("to_clipboard", excel=excel, sep=sep, **kwargs)
+
+    def to_csv(
+        self,
+        path_or_buf=None,
+        sep=",",
+        na_rep="",
+        float_format=None,
+        columns=None,
+        header=True,
+        index=True,
+        index_label=None,
+        mode="w",
+        encoding=None,
+        compression="infer",
+        quoting=None,
+        quotechar='"',
+        lineterminator=None,
+        chunksize=None,
+        date_format=None,
+        doublequote=True,
+        escapechar=None,
+        decimal=".",
+        errors: str = "strict",
+        storage_options: StorageOptions = None,
+    ):  # pragma: no cover
+        ErrorMessage.not_implemented()
+        from snowflake.snowpark.modin.pandas.core.execution.dispatching.factories.dispatcher import (
+            FactoryDispatcher,
+        )
+
+        return FactoryDispatcher.to_csv(
+            self._query_compiler,
+            path_or_buf=path_or_buf,
+            sep=sep,
+            na_rep=na_rep,
+            float_format=float_format,
+            columns=columns,
+            header=header,
+            index=index,
+            index_label=index_label,
+            mode=mode,
+            encoding=encoding,
+            compression=compression,
+            quoting=quoting,
+            quotechar=quotechar,
+            lineterminator=lineterminator,
+            chunksize=chunksize,
+            date_format=date_format,
+            doublequote=doublequote,
+            escapechar=escapechar,
+            decimal=decimal,
+            errors=errors,
+            storage_options=storage_options,
+        )
+
+    def to_excel(
+        self,
+        excel_writer,
+        sheet_name="Sheet1",
+        na_rep="",
+        float_format=None,
+        columns=None,
+        header=True,
+        index=True,
+        index_label=None,
+        startrow=0,
+        startcol=0,
+        engine=None,
+        merge_cells=True,
+        encoding=no_default,
+        inf_rep="inf",
+        verbose=no_default,
+        freeze_panes=None,
+        storage_options: StorageOptions = None,
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Write object to an Excel sheet.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_excel",
+            excel_writer,
+            sheet_name=sheet_name,
+            na_rep=na_rep,
+            float_format=float_format,
+            columns=columns,
+            header=header,
+            index=index,
+            index_label=index_label,
+            startrow=startrow,
+            startcol=startcol,
+            engine=engine,
+            merge_cells=merge_cells,
+            inf_rep=inf_rep,
+            freeze_panes=freeze_panes,
+            storage_options=storage_options,
+        )
+
+    def to_hdf(
+        self, path_or_buf, key, format="table", **kwargs
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Write the contained data to an HDF5 file using HDFStore.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_hdf", path_or_buf, key, format=format, **kwargs
+        )
+
+    def to_json(
+        self,
+        path_or_buf=None,
+        orient=None,
+        date_format=None,
+        double_precision=10,
+        force_ascii=True,
+        date_unit="ms",
+        default_handler=None,
+        lines=False,
+        compression="infer",
+        index=True,
+        indent=None,
+        storage_options: StorageOptions = None,
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Convert the object to a JSON string.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_json",
+            path_or_buf,
+            orient=orient,
+            date_format=date_format,
+            double_precision=double_precision,
+            force_ascii=force_ascii,
+            date_unit=date_unit,
+            default_handler=default_handler,
+            lines=lines,
+            compression=compression,
+            index=index,
+            indent=indent,
+            storage_options=storage_options,
+        )
+
+    def to_latex(
+        self,
+        buf=None,
+        columns=None,
+        col_space=None,
+        header=True,
+        index=True,
+        na_rep="NaN",
+        formatters=None,
+        float_format=None,
+        sparsify=None,
+        index_names=True,
+        bold_rows=False,
+        column_format=None,
+        longtable=None,
+        escape=None,
+        encoding=None,
+        decimal=".",
+        multicolumn=None,
+        multicolumn_format=None,
+        multirow=None,
+        caption=None,
+        label=None,
+        position=None,
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Render object to a LaTeX tabular, longtable, or nested table.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_latex",
+            buf=buf,
+            columns=columns,
+            col_space=col_space,
+            header=header,
+            index=index,
+            na_rep=na_rep,
+            formatters=formatters,
+            float_format=float_format,
+            sparsify=sparsify,
+            index_names=index_names,
+            bold_rows=bold_rows,
+            column_format=column_format,
+            longtable=longtable,
+            escape=escape,
+            encoding=encoding,
+            decimal=decimal,
+            multicolumn=multicolumn,
+            multicolumn_format=multicolumn_format,
+            multirow=multirow,
+            caption=caption,
+            label=label,
+            position=position,
+        )
+
+    def to_markdown(
+        self,
+        buf=None,
+        mode: str = "wt",
+        index: bool = True,
+        storage_options: StorageOptions = None,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Print `BasePandasDataset` in Markdown-friendly format.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_markdown",
+            buf=buf,
+            mode=mode,
+            index=index,
+            storage_options=storage_options,
+            **kwargs,
+        )
+
+    def to_pickle(
+        self,
+        path,
+        compression: CompressionOptions = "infer",
+        protocol: int = pkl.HIGHEST_PROTOCOL,
+        storage_options: StorageOptions = None,
+    ):  # pragma: no cover  # noqa: PR01, D200
+        """
+        Pickle (serialize) object to file.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        from snowflake.snowpark.modin.pandas import to_pickle
+
+        to_pickle(
+            self,
+            path,
+            compression=compression,
+            protocol=protocol,
+            storage_options=storage_options,
+        )
+
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value: object = no_default,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """
+        Convert the `BasePandasDataset` to a NumPy array or a Modin wrapper for NumPy array.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        if copy:
+            WarningMessage.ignored_argument(
+                operation="to_numpy",
+                argument="copy",
+                message="copy is ignored in Snowflake backend",
+            )
+        return self._query_compiler.to_numpy(
+            dtype=dtype,
+            na_value=na_value,
+            **kwargs,
+        )
+
+    # TODO(williamma12): When this gets implemented, have the series one call this.
+    def to_period(
+        self, freq=None, axis=0, copy=True
+    ):  # pragma: no cover  # noqa: PR01, RT01, D200
+        """
+        Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._default_to_pandas("to_period", freq=freq, axis=axis, copy=copy)
+
+    def to_string(
+        self,
+        buf=None,
+        columns=None,
+        col_space=None,
+        header=True,
+        index=True,
+        na_rep="NaN",
+        formatters=None,
+        float_format=None,
+        sparsify=None,
+        index_names=True,
+        justify=None,
+        max_rows=None,
+        min_rows=None,
+        max_cols=None,
+        show_dimensions=False,
+        decimal=".",
+        line_width=None,
+        max_colwidth=None,
+        encoding=None,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Render a `BasePandasDataset` to a console-friendly tabular output.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            "to_string",
+            buf=buf,
+            columns=columns,
+            col_space=col_space,
+            header=header,
+            index=index,
+            na_rep=na_rep,
+            formatters=formatters,
+            float_format=float_format,
+            sparsify=sparsify,
+            index_names=index_names,
+            justify=justify,
+            max_rows=max_rows,
+            max_cols=max_cols,
+            show_dimensions=show_dimensions,
+            decimal=decimal,
+            line_width=line_width,
+            max_colwidth=max_colwidth,
+            encoding=encoding,
+        )
+
+    def to_sql(
+        self,
+        name,
+        con,
+        schema=None,
+        if_exists="fail",
+        index=True,
+        index_label=None,
+        chunksize=None,
+        dtype=None,
+        method=None,
+    ):  # noqa: PR01, D200
+        """
+        Write records stored in a `BasePandasDataset` to a SQL database.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        new_query_compiler = self._query_compiler
+        # writing the index to the database by inserting it to the DF
+        if index:
+            if not index_label:
+                index_label = "index"
+            new_query_compiler = new_query_compiler.insert(0, index_label, self.index)
+            # so pandas._to_sql will not write the index to the database as well
+            index = False
+
+        from modin.core.execution.dispatching.factories.dispatcher import (
+            FactoryDispatcher,
+        )
+
+        FactoryDispatcher.to_sql(
+            new_query_compiler,
+            name=name,
+            con=con,
+            schema=schema,
+            if_exists=if_exists,
+            index=index,
+            index_label=index_label,
+            chunksize=chunksize,
+            dtype=dtype,
+            method=method,
+        )
+
+    # TODO(williamma12): When this gets implemented, have the series one call this.
+    def to_timestamp(
+        self, freq=None, how="start", axis=0, copy=True
+    ):  # noqa: PR01, RT01, D200
+        """
+        Cast to DatetimeIndex of timestamps, at *beginning* of period.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        return self._default_to_pandas(
+            "to_timestamp", freq=freq, how=how, axis=axis, copy=copy
+        )
+
+    def to_xarray(self):  # noqa: PR01, RT01, D200
+        """
+        Return an xarray object from the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas("to_xarray")
+
+    def truediv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`).
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op(
+            "truediv", other, axis=axis, level=level, fill_value=fill_value
+        )
+
+    div = divide = truediv
+
+    def truncate(
+        self, before=None, after=None, axis=None, copy=True
+    ):  # noqa: PR01, RT01, D200
+        """
+        Truncate a `BasePandasDataset` before and after some index value.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        if (
+            not self.axes[axis].is_monotonic_increasing
+            and not self.axes[axis].is_monotonic_decreasing
+        ):
+            raise ValueError("truncate requires a sorted index")
+        s = slice(*self.axes[axis].slice_locs(before, after))
+        slice_obj = s if axis == 0 else (slice(None), s)
+        return self.iloc[slice_obj]
+
+    def transform(self, func, axis=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        kwargs["is_transform"] = True
+        self._validate_function(func)
+        try:
+            result = self.agg(func, axis=axis, *args, **kwargs)
+        except TypeError:
+            raise
+        except Exception as err:
+            raise ValueError("Transform function failed") from err
+        try:
+            assert len(result) == len(self)
+        except Exception:
+            raise ValueError("transforms cannot produce aggregated results")
+        return result
+
+    def tz_convert(self, tz, axis=0, level=None, copy=True):  # noqa: PR01, RT01, D200
+        """
+        Convert tz-aware axis to target time zone.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        if level is not None:
+            new_labels = (
+                pandas.Series(index=self.axes[axis]).tz_convert(tz, level=level).index
+            )
+        else:
+            new_labels = self.axes[axis].tz_convert(tz)
+        obj = self.copy() if copy else self
+        return obj.set_axis(new_labels, axis, copy=copy)
+
+    def tz_localize(
+        self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise"
+    ):  # noqa: PR01, RT01, D200
+        """
+        Localize tz-naive index of a `BasePandasDataset` to target time zone.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()
+        axis = self._get_axis_number(axis)
+        new_labels = (
+            pandas.Series(index=self.axes[axis])
+            .tz_localize(
+                tz,
+                axis=axis,
+                level=level,
+                copy=False,
+                ambiguous=ambiguous,
+                nonexistent=nonexistent,
+            )
+            .index
+        )
+        return self.set_axis(new_labels, axis, copy=copy)
+
+    def var(
+        self,
+        axis: Axis | None = None,
+        skipna: bool = True,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        """
+        Return unbiased variance over requested axis.
+        """
+        kwargs.update({"ddof": ddof})
+        return self._agg_helper(
+            func="var",
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def __abs__(self):
+        """
+        Return a `BasePandasDataset` with absolute numeric value of each element.
+
+        Returns
+        -------
+        BasePandasDataset
+            Object containing the absolute value of each element.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.abs()
+
+    @_doc_binary_op(
+        operation="union", bin_op="and", right="other", **_doc_binary_op_kwargs
+    )
+    def __and__(self, other):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("__and__", other, axis=0)
+
+    @_doc_binary_op(
+        operation="union", bin_op="rand", right="other", **_doc_binary_op_kwargs
+    )
+    def __rand__(self, other):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("__rand__", other, axis=0)
+
+    def __array__(self, dtype=None):
+        """
+        Return the values as a NumPy array.
+
+        Parameters
+        ----------
+        dtype : str or np.dtype, optional
+            The dtype of returned array.
+
+        Returns
+        -------
+        arr : np.ndarray
+            NumPy representation of Modin object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        arr = self.to_numpy(dtype)
+        return arr
+
+    def __array_wrap__(self, result, context=None):
+        """
+        Get called after a ufunc and other functions.
+
+        Parameters
+        ----------
+        result : np.ndarray
+            The result of the ufunc or other function called on the NumPy array
+            returned by __array__.
+        context : tuple of (func, tuple, int), optional
+            This parameter is returned by ufuncs as a 3-element tuple: (name of the
+            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
+            other NumPy functions.
+
+        Returns
+        -------
+        BasePandasDataset
+            Wrapped Modin object.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        # TODO: This is very inefficient. __array__ and as_matrix have been
+        # changed to call the more efficient to_numpy, but this has been left
+        # unchanged since we are not sure of its purpose.
+        return self._default_to_pandas("__array_wrap__", result, context=context)
+
+    def __copy__(self, deep=True):
+        """
+        Return the copy of the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        deep : bool, default: True
+            Whether the copy should be deep or not.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.copy(deep=deep)
+
+    def __deepcopy__(self, memo=None):
+        """
+        Return the deep copy of the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        memo : Any, optional
+           Deprecated parameter.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.copy(deep=True)
+
+    @_doc_binary_op(
+        operation="equality comparison",
+        bin_op="eq",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __eq__(self, other):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.eq(other)
+
+    def __finalize__(self, other, method=None, **kwargs):
+        """
+        Propagate metadata from `other` to `self`.
+
+        Parameters
+        ----------
+        other : BasePandasDataset
+            The object from which to get the attributes that we are going
+            to propagate.
+        method : str, optional
+            A passed method name providing context on where `__finalize__`
+            was called.
+        **kwargs : dict
+            Additional keywords arguments to be passed to `__finalize__`.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        return self._default_to_pandas("__finalize__", other, method=method, **kwargs)
+
+    @_doc_binary_op(
+        operation="greater than or equal comparison",
+        bin_op="ge",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __ge__(self, right):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.ge(right)
+
+    def __getitem__(self, key):
+        """
+        Retrieve dataset according to `key`.
+
+        Parameters
+        ----------
+        key : callable, scalar, slice, str or tuple
+            The global row index to retrieve data from.
+
+        Returns
+        -------
+        BasePandasDataset
+            Located dataset.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        key = apply_if_callable(key, self)
+        # If a slice is passed in, use .iloc[key].
+        if isinstance(key, slice):
+            if (is_integer(key.start) or key.start is None) and (
+                is_integer(key.stop) or key.stop is None
+            ):
+                return self.iloc[key]
+            else:
+                return self.loc[key]
+
+        # If the object calling getitem is a Series, only use .loc[key] to filter index.
+        if isinstance(self, pd.Series):
+            return self.loc[key]
+
+        # If the object is a boolean list-like object, use .loc[key] to filter index.
+        # The if statement is structured this way to avoid calling dtype and reduce query count.
+        if isinstance(key, pd.Series):
+            if key.dtype == bool:
+                return self.loc[key]
+        elif is_list_like(key):
+            if hasattr(key, "dtype"):
+                if key.dtype == bool:
+                    return self.loc[key]
+            if (all(is_bool(k) for k in key)) and len(key) > 0:
+                return self.loc[key]
+
+        # In all other cases, use .loc[:, key] to filter columns.
+        return self.loc[:, key]
+
+    __hash__ = None
+
+    @_doc_binary_op(
+        operation="greater than comparison",
+        bin_op="gt",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __gt__(self, right):
+        return self.gt(right)
+
+    def __invert__(self):
+        """
+        Apply bitwise inverse to each element of the `BasePandasDataset`.
+
+        Returns
+        -------
+        BasePandasDataset
+            New BasePandasDataset containing bitwise inverse to each value.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(query_compiler=self._query_compiler.invert())
+
+    @_doc_binary_op(
+        operation="less than or equal comparison",
+        bin_op="le",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __le__(self, right):
+        return self.le(right)
+
+    def __len__(self) -> int:
+        """
+        Return length of info axis.
+
+        Returns
+        -------
+        int
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._query_compiler.get_axis_len(axis=0)
+
+    @_doc_binary_op(
+        operation="less than comparison",
+        bin_op="lt",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __lt__(self, right):
+        return self.lt(right)
+
+    def __matmul__(self, other):
+        """
+        Compute the matrix multiplication between the `BasePandasDataset` and `other`.
+
+        Parameters
+        ----------
+        other : BasePandasDataset or array-like
+            The other object to compute the matrix product with.
+
+        Returns
+        -------
+        BasePandasDataset, np.ndarray or scalar
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.dot(other)
+
+    @_doc_binary_op(
+        operation="not equal comparison",
+        bin_op="ne",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __ne__(self, other):
+        return self.ne(other)
+
+    def __neg__(self):
+        """
+        Change the sign for every value of self.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.__constructor__(
+            query_compiler=self._query_compiler.unary_op("__neg__")
+        )
+
+    def __nonzero__(self):
+        """
+        Evaluate `BasePandasDataset` as boolean object.
+
+        Raises
+        ------
+        ValueError
+            Always since truth value for self is ambiguous.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        raise ValueError(
+            f"The truth value of a {self.__class__.__name__} is ambiguous. "
+            + "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+        )
+
+    __bool__ = __nonzero__
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="or",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __or__(self, other):
+        return self._binary_op("__or__", other, axis=0)
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="ror",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __ror__(self, other):
+        return self._binary_op("__ror__", other, axis=0)
+
+    def __sizeof__(self):
+        """
+        Generate the total memory usage for an `BasePandasDataset`.
+
+        Returns
+        -------
+        int
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        return self._default_to_pandas("__sizeof__")
+
+    def __str__(self):  # pragma: no cover
+        """
+        Return str(self).
+
+        Returns
+        -------
+        str
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return repr(self)
+
+    @_doc_binary_op(
+        operation="exclusive disjunction",
+        bin_op="xor",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __xor__(self, other):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("__xor__", other, axis=0)
+
+    @_doc_binary_op(
+        operation="exclusive disjunction",
+        bin_op="rxor",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __rxor__(self, other):
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self._binary_op("__rxor__", other, axis=0)
+
+    @property
+    def size(self) -> int:
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return np.prod(self.shape)  # type: ignore[return-value]
+
+    @property
+    def values(self) -> np.ndarray:
+        """
+        Return a NumPy representation of the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        return self.to_numpy()
+
+    def _repartition(self, axis: int | None = None):
+        """
+        Repartitioning Modin objects to get ideal partitions inside.
+
+        Allows to improve performance where the query compiler can't improve
+        yet by doing implicit repartitioning.
+
+        Parameters
+        ----------
+        axis : {0, 1, None}, optional
+            The axis along which the repartitioning occurs.
+            `None` is used for repartitioning along both axes.
+
+        Returns
+        -------
+        DataFrame or Series
+            The repartitioned dataframe or series, depending on the original type.
+        """
+        # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset
+        allowed_axis_values = (0, 1, None)
+        if axis not in allowed_axis_values:
+            raise ValueError(
+                f"Passed `axis` parameter: {axis}, but should be one of {allowed_axis_values}"
+            )
+        return self.__constructor__(
+            query_compiler=self._query_compiler.repartition(axis=axis)
+        )
+
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        """
+        Apply the `ufunc` to the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        ufunc : np.ufunc
+            The NumPy ufunc to apply.
+        method : str
+            The method to apply.
+        *inputs : tuple
+            The inputs to the ufunc.
+        **kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        BasePandasDataset
+            The result of the ufunc applied to the `BasePandasDataset`.
+        """
+        # Use pandas version of ufunc if it exists
+        if method != "__call__":
+            # Return sentinel value NotImplemented
+            return NotImplemented
+        from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import (
+            numpy_to_pandas_universal_func_map,
+        )
+
+        if ufunc.__name__ in numpy_to_pandas_universal_func_map:
+            ufunc = numpy_to_pandas_universal_func_map[ufunc.__name__]
+            return ufunc(self, inputs[1:], kwargs)
+        # return the sentinel NotImplemented if we do not support this function
+        return NotImplemented
+
+    def __array_function__(
+        self, func: callable, types: tuple, args: tuple, kwargs: dict
+    ):
+        """
+        Apply the `func` to the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        func : np.func
+            The NumPy func to apply.
+        types : tuple
+            The types of the args.
+        args : tuple
+            The args to the func.
+        kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        BasePandasDataset
+            The result of the ufunc applied to the `BasePandasDataset`.
+        """
+        from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import (
+            numpy_to_pandas_func_map,
+        )
+
+        if func.__name__ in numpy_to_pandas_func_map:
+            return numpy_to_pandas_func_map[func.__name__](*args, **kwargs)
+        else:
+            # per NEP18 we raise NotImplementedError so that numpy can intercept
+            return NotImplemented  # pragma: no cover
diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py
new file mode 100644
index 00000000000..a84748d7e8a
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/dataframe.py
@@ -0,0 +1,3445 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses ``DataFrame`` class, that is distributed version of ``pandas.DataFrame``."""
+
+from __future__ import annotations
+
+import collections
+import datetime
+import functools
+import itertools
+import re
+import sys
+import warnings
+from collections.abc import Hashable, Iterable, Iterator, Mapping, Sequence
+from logging import getLogger
+from typing import IO, Any, Callable, Literal
+
+import numpy as np
+import pandas
+from pandas._libs.lib import NoDefault, no_default
+from pandas._typing import (
+    AggFuncType,
+    AnyArrayLike,
+    Axes,
+    Axis,
+    CompressionOptions,
+    FilePath,
+    FillnaOptions,
+    IgnoreRaise,
+    IndexLabel,
+    Level,
+    PythonFuncType,
+    Renamer,
+    Scalar,
+    StorageOptions,
+    Suffixes,
+    WriteBuffer,
+)
+from pandas.core.common import apply_if_callable, is_bool_indexer
+from pandas.core.dtypes.common import (
+    infer_dtype_from_object,
+    is_bool_dtype,
+    is_dict_like,
+    is_list_like,
+    is_numeric_dtype,
+)
+from pandas.core.dtypes.inference import is_hashable, is_integer
+from pandas.core.indexes.frozen import FrozenList
+from pandas.io.formats.printing import pprint_thing
+from pandas.util._validators import validate_bool_kwarg
+
+from snowflake.snowpark.modin import pandas as pd
+from snowflake.snowpark.modin.pandas.accessor import CachedAccessor, SparseFrameAccessor
+from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
+from snowflake.snowpark.modin.pandas.groupby import (
+    DataFrameGroupBy,
+    validate_groupby_args,
+)
+
+# from . import _update_engine
+from snowflake.snowpark.modin.pandas.iterator import PartitionIterator
+from snowflake.snowpark.modin.pandas.series import Series
+from snowflake.snowpark.modin.pandas.snow_partition_iterator import (
+    SnowparkPandasRowPartitionIterator,
+)
+from snowflake.snowpark.modin.pandas.utils import (
+    create_empty_native_pandas_frame,
+    from_non_pandas,
+    from_pandas,
+    is_scalar,
+    raise_if_native_pandas_objects,
+    replace_external_data_keys_with_empty_pandas_series,
+    replace_external_data_keys_with_query_compiler,
+)
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    is_snowflake_agg_func,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated
+from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import (
+    SET_DATAFRAME_ATTRIBUTE_WARNING,
+    WarningMessage,
+)
+from snowflake.snowpark.modin.utils import _inherit_docstrings, hashable, to_pandas
+from snowflake.snowpark.udf import UserDefinedFunction
+
+logger = getLogger(__name__)
+
+DF_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE = (
+    "Currently do not support Series or list-like keys with range-like values"
+)
+
+DF_SETITEM_SLICE_AS_SCALAR_VALUE = (
+    "Currently do not support assigning a slice value as if it's a scalar value"
+)
+
+DF_ITERROWS_ITERTUPLES_WARNING_MESSAGE = (
+    "{} will result eager evaluation and potential data pulling, which is inefficient. For efficient Snowpark "
+    "pandas usage, consider rewriting the code with an operator (such as DataFrame.apply or DataFrame.applymap) which "
+    "can work on the entire DataFrame in one shot."
+)
+
+# Dictionary of extensions assigned to this class
+_DATAFRAME_EXTENSIONS_ = {}
+
+
+@_inherit_docstrings(
+    pandas.DataFrame,
+    excluded=[
+        pandas.DataFrame.flags,
+        pandas.DataFrame.cov,
+        pandas.DataFrame.merge,
+        pandas.DataFrame.reindex,
+        pandas.DataFrame.to_parquet,
+        pandas.DataFrame.fillna,
+    ],
+    apilink="pandas.DataFrame",
+)
+class DataFrame(BasePandasDataset):
+    _pandas_class = pandas.DataFrame
+
+    def __init__(
+        self,
+        data=None,
+        index=None,
+        columns=None,
+        dtype=None,
+        copy=None,
+        query_compiler=None,
+    ) -> None:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # Siblings are other dataframes that share the same query compiler. We
+        # use this list to update inplace when there is a shallow copy.
+        self._siblings = []
+
+        # Engine.subscribe(_update_engine)
+        if isinstance(data, (DataFrame, Series)):
+            self._query_compiler = data._query_compiler.copy()
+            if index is not None and any(i not in data.index for i in index):
+                ErrorMessage.not_implemented(
+                    "Passing non-existant columns or index values to constructor not"
+                    + " yet implemented."
+                )  # pragma: no cover
+            if isinstance(data, Series):
+                # We set the column name if it is not in the provided Series
+                if data.name is None:
+                    self.columns = [0] if columns is None else columns
+                # If the columns provided are not in the named Series, pandas clears
+                # the DataFrame and sets columns to the columns provided.
+                elif columns is not None and data.name not in columns:
+                    self._query_compiler = from_pandas(
+                        self.__constructor__(columns=columns)
+                    )._query_compiler
+                if index is not None:
+                    self._query_compiler = data.loc[index]._query_compiler
+            elif columns is None and index is None:
+                data._add_sibling(self)
+            else:
+                if columns is not None and any(i not in data.columns for i in columns):
+                    ErrorMessage.not_implemented(
+                        "Passing non-existant columns or index values to constructor not"
+                        + " yet implemented."
+                    )  # pragma: no cover
+                if index is None:
+                    index = slice(None)
+                if columns is None:
+                    columns = slice(None)
+                self._query_compiler = data.loc[index, columns]._query_compiler
+
+        # Check type of data and use appropriate constructor
+        elif query_compiler is None:
+            distributed_frame = from_non_pandas(data, index, columns, dtype)
+            if distributed_frame is not None:
+                self._query_compiler = distributed_frame._query_compiler
+                return
+
+            if isinstance(data, pandas.Index):
+                pass
+            elif is_list_like(data) and not is_dict_like(data):
+                old_dtype = getattr(data, "dtype", None)
+                values = [
+                    obj._to_pandas() if isinstance(obj, Series) else obj for obj in data
+                ]
+                if isinstance(data, np.ndarray):
+                    data = np.array(values, dtype=old_dtype)
+                else:
+                    try:
+                        data = type(data)(values, dtype=old_dtype)
+                    except TypeError:
+                        data = values
+            elif is_dict_like(data) and not isinstance(
+                data, (pandas.Series, Series, pandas.DataFrame, DataFrame)
+            ):
+                if columns is not None:
+                    data = {key: value for key, value in data.items() if key in columns}
+
+                if len(data) and all(isinstance(v, Series) for v in data.values()):
+                    from .general import concat
+
+                    new_qc = concat(
+                        data.values(), axis=1, keys=data.keys()
+                    )._query_compiler
+
+                    if dtype is not None:
+                        new_qc = new_qc.astype({col: dtype for col in new_qc.columns})
+                    if index is not None:
+                        new_qc = new_qc.reindex(axis=0, labels=index)
+                    if columns is not None:
+                        new_qc = new_qc.reindex(axis=1, labels=columns)
+
+                    self._query_compiler = new_qc
+                    return
+
+                data = {
+                    k: v._to_pandas() if isinstance(v, Series) else v
+                    for k, v in data.items()
+                }
+            pandas_df = pandas.DataFrame(
+                data=data, index=index, columns=columns, dtype=dtype, copy=copy
+            )
+            self._query_compiler = from_pandas(pandas_df)._query_compiler
+        else:
+            self._query_compiler = query_compiler
+
+    def __repr__(self):
+        """
+        Return a string representation for a particular ``DataFrame``.
+
+        Returns
+        -------
+        str
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        num_rows = pandas.get_option("display.max_rows") or 10
+        # see _repr_html_ for comment, allow here also all column behavior
+        num_cols = pandas.get_option("display.max_columns")
+
+        (
+            row_count,
+            col_count,
+            repr_df,
+        ) = self._query_compiler.build_repr_df(num_rows, num_cols, "x")
+        result = repr(repr_df)
+
+        # if truncated, add shape information
+        if is_repr_truncated(row_count, col_count, num_rows, num_cols):
+            # The split here is so that we don't repr pandas row lengths.
+            return result.rsplit("\n\n", 1)[0] + "\n\n[{} rows x {} columns]".format(
+                row_count, col_count
+            )
+        else:
+            return result
+
+    def _repr_html_(self):  # pragma: no cover
+        """
+        Return a html representation for a particular ``DataFrame``.
+
+        Returns
+        -------
+        str
+
+        Notes
+        -----
+        Supports pandas `display.max_rows` and `display.max_columns` options.
+        """
+        num_rows = pandas.get_option("display.max_rows") or 60
+        # Modin uses here 20 as default, but this does not coincide well with pandas option. Therefore allow
+        # here value=0 which means display all columns.
+        num_cols = pandas.get_option("display.max_columns")
+
+        (
+            row_count,
+            col_count,
+            repr_df,
+        ) = self._query_compiler.build_repr_df(num_rows, num_cols)
+        result = repr_df._repr_html_()
+
+        if is_repr_truncated(row_count, col_count, num_rows, num_cols):
+            # We split so that we insert our correct dataframe dimensions.
+            return (
+                result.split("<p>")[0]
+                + f"<p>{row_count} rows × {col_count} columns</p>\n</div>"
+            )
+        else:
+            return result
+
+    def _get_columns(self) -> pandas.Index:
+        """
+        Get the columns for this Snowpark pandas ``DataFrame``.
+
+        Returns
+        -------
+        pandas.Index
+            The all columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._query_compiler.columns
+
+    def _set_columns(self, new_columns: Axes) -> None:
+        """
+        Set the columns for this Snowpark pandas  ``DataFrame``.
+
+        Parameters
+        ----------
+        new_columns :
+            The new columns to set.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        self._update_inplace(
+            new_query_compiler=self._query_compiler.set_columns(new_columns)
+        )
+
+    columns = property(_get_columns, _set_columns)
+
+    @property
+    def ndim(self) -> int:
+        return 2
+
+    def drop_duplicates(
+        self, subset=None, keep="first", inplace=False, ignore_index=False
+    ):  # noqa: PR01, RT01, D200
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        """
+        Return ``DataFrame`` with duplicate rows removed.
+        """
+        return super().drop_duplicates(
+            subset=subset, keep=keep, inplace=inplace, ignore_index=ignore_index
+        )
+
+    def dropna(
+        self,
+        *,
+        axis: Axis = 0,
+        how: str | NoDefault = no_default,
+        thresh: int | NoDefault = no_default,
+        subset: IndexLabel = None,
+        inplace: bool = False,
+    ):  # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super()._dropna(
+            axis=axis, how=how, thresh=thresh, subset=subset, inplace=inplace
+        )
+
+    @property
+    def dtypes(self):  # noqa: RT01, D200
+        """
+        Return the dtypes in the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._query_compiler.dtypes
+
+    def duplicated(
+        self, subset: Hashable | Sequence[Hashable] = None, keep: DropKeep = "first"
+    ):
+        """
+        Return boolean ``Series`` denoting duplicate rows.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        df = self[subset] if subset is not None else self
+        new_qc = df._query_compiler.duplicated(keep=keep)
+        duplicates = self._reduce_dimension(new_qc)
+        # remove Series name which was assigned automatically by .apply in QC
+        # this is pandas behavior, i.e., if duplicated result is a series, no name is returned
+        duplicates.name = None
+        return duplicates
+
+    @property
+    def empty(self) -> bool:
+        """
+        Indicate whether ``DataFrame`` is empty.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return len(self.columns) == 0 or len(self) == 0
+
+    @property
+    def axes(self):
+        """
+        Return a list representing the axes of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return [self.index, self.columns]
+
+    @property
+    def shape(self) -> tuple[int, int]:
+        """
+        Return a tuple representing the dimensionality of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return len(self), len(self.columns)
+
+    def add_prefix(self, prefix):
+        """
+        Prefix labels with string `prefix`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # pandas converts non-string prefix values into str and adds it to the column labels.
+        return self.__constructor__(
+            query_compiler=self._query_compiler.add_substring(
+                str(prefix), substring_type="prefix", axis=1
+            )
+        )
+
+    def add_suffix(self, suffix):
+        """
+        Suffix labels with string `suffix`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # pandas converts non-string suffix values into str and appends it to the column labels.
+        return self.__constructor__(
+            query_compiler=self._query_compiler.add_substring(
+                str(suffix), substring_type="suffix", axis=1
+            )
+        )
+
+    def applymap(self, func: PythonFuncType, na_action: str | None = None, **kwargs):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if not callable(func):
+            raise TypeError(f"{func} is not callable")
+        return self.__constructor__(
+            query_compiler=self._query_compiler.applymap(
+                func, na_action=na_action, **kwargs
+            )
+        )
+
+    def aggregate(
+        self, func: AggFuncType = None, axis: Axis = 0, *args: Any, **kwargs: Any
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().aggregate(func, axis, *args, **kwargs)
+
+    agg = aggregate
+
+    def apply(
+        self,
+        func: AggFuncType | UserDefinedFunction,
+        axis: Axis = 0,
+        raw: bool = False,
+        result_type: Literal["expand", "reduce", "broadcast"] | None = None,
+        args=(),
+        **kwargs,
+    ):
+        """
+        Apply a function along an axis of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        axis = self._get_axis_number(axis)
+        query_compiler = self._query_compiler.apply(
+            func,
+            axis,
+            raw=raw,
+            result_type=result_type,
+            args=args,
+            **kwargs,
+        )
+        if not isinstance(query_compiler, type(self._query_compiler)):
+            # A scalar was returned
+            return query_compiler
+
+        # If True, it is an unamed series.
+        # Theoretically, if df.apply returns a Series, it will only be an unnamed series
+        # because the function is supposed to be series -> scalar.
+        if query_compiler._modin_frame.is_unnamed_series():
+            return Series(query_compiler=query_compiler)
+        else:
+            return self.__constructor__(query_compiler=query_compiler)
+
+    def groupby(
+        self,
+        by=None,
+        axis: Axis | NoDefault = no_default,
+        level: IndexLabel | None = None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        observed: bool | NoDefault = no_default,
+        dropna: bool = True,
+    ):
+        """
+        Group ``DataFrame`` using a mapper or by a ``Series`` of columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if axis is not no_default:
+            axis = self._get_axis_number(axis)
+            if axis == 1:
+                warnings.warn(
+                    "DataFrame.groupby with axis=1 is deprecated. Do "
+                    + "`frame.T.groupby(...)` without axis instead.",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+            else:
+                warnings.warn(
+                    "The 'axis' keyword in DataFrame.groupby is deprecated and "
+                    + "will be removed in a future version.",
+                    FutureWarning,
+                    stacklevel=1,
+                )
+        else:
+            axis = 0
+
+        validate_groupby_args(by, level, observed)
+
+        axis = self._get_axis_number(axis)
+
+        if axis != 0 and as_index is False:
+            raise ValueError("as_index=False only valid for axis=0")
+
+        idx_name = None
+
+        if (
+            not isinstance(by, Series)
+            and is_list_like(by)
+            and len(by) == 1
+            # if by is a list-like of (None,), we have to keep it as a list because
+            # None may be referencing a column or index level whose label is
+            # `None`, and by=None wold mean that there is no `by` param.
+            and by[0] is not None
+        ):
+            by = by[0]
+
+        if hashable(by) and (
+            not callable(by) and not isinstance(by, (pandas.Grouper, FrozenList))
+        ):
+            idx_name = by
+        elif isinstance(by, Series):
+            idx_name = by.name
+            if by._parent is self:
+                # if the SnowSeries comes from the current dataframe,
+                # convert it to labels directly for easy processing
+                by = by.name
+        elif is_list_like(by):
+            if axis == 0 and all(
+                (
+                    (hashable(o) and (o in self))
+                    or isinstance(o, Series)
+                    or (is_list_like(o) and len(o) == len(self.shape[axis]))
+                )
+                for o in by
+            ):
+                # plit 'by's into those that belongs to the self (internal_by)
+                # and those that doesn't (external_by). For SnowSeries that belongs
+                # to current DataFrame, we convert it to labels for easy process.
+                internal_by, external_by = [], []
+
+                for current_by in by:
+                    if hashable(current_by):
+                        internal_by.append(current_by)
+                    elif isinstance(current_by, Series):
+                        if current_by._parent is self:
+                            internal_by.append(current_by.name)
+                        else:
+                            external_by.append(current_by)  # pragma: no cover
+                    else:
+                        external_by.append(current_by)
+
+                by = internal_by + external_by
+
+        return DataFrameGroupBy(
+            self,
+            by,
+            axis,
+            level,
+            as_index,
+            sort,
+            group_keys,
+            idx_name,
+            observed=observed,
+            dropna=dropna,
+        )
+
+    def keys(self):  # noqa: RT01, D200
+        """
+        Get columns of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self.columns
+
+    def transform(
+        self, func: PythonFuncType, axis: Axis = 0, *args: Any, **kwargs: Any
+    ) -> DataFrame:  # noqa: PR01, RT01, D200
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if is_list_like(func) or is_dict_like(func):
+            ErrorMessage.not_implemented(
+                "dict and list parameters are not supported for transform"
+            )
+        # throw the same error as pandas for cases where the function type is
+        # invalid.
+        if not isinstance(func, str) and not callable(func):
+            raise TypeError(f"{type(func)} object is not callable")
+
+        # if the function is an aggregation function, we'll produce
+        # some bogus results while pandas will throw the error the
+        # code below is throwing. So we do the same.
+        if is_snowflake_agg_func(func):
+            raise ValueError("Function did not transform")
+
+        return self.apply(func, axis, False, args=args, **kwargs)
+
+    def transpose(self, copy=False, *args):  # noqa: PR01, RT01, D200
+        """
+        Transpose index and columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if copy:
+            WarningMessage.ignored_argument(
+                operation="transpose",
+                argument="copy",
+                message="Transpose ignore copy argument in Snowpark pandas API",
+            )
+
+        if args:
+            WarningMessage.ignored_argument(
+                operation="transpose",
+                argument="args",
+                message="Transpose ignores args in Snowpark pandas API",
+            )
+
+        return self.__constructor__(query_compiler=self._query_compiler.transpose())
+
+    T = property(transpose)
+
+    def add(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `add`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "add",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def assign(self, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Assign new columns to a ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        df = self.copy()
+        for k, v in kwargs.items():
+            if callable(v):
+                df[k] = v(df)
+            else:
+                df[k] = v
+        return df
+
+    def boxplot(
+        self,
+        column=None,
+        by=None,
+        ax=None,
+        fontsize=None,
+        rot=0,
+        grid=True,
+        figsize=None,
+        layout=None,
+        return_type=None,
+        backend=None,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Make a box plot from ``DataFrame`` columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return to_pandas(self).boxplot(
+            column=column,
+            by=by,
+            ax=ax,
+            fontsize=fontsize,
+            rot=rot,
+            grid=grid,
+            figsize=figsize,
+            layout=layout,
+            return_type=return_type,
+            backend=backend,
+            **kwargs,
+        )
+
+    def combine(
+        self, other, func, fill_value=None, overwrite=True
+    ):  # noqa: PR01, RT01, D200
+        """
+        Perform column-wise combine with another ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return super().combine(other, func, fill_value=fill_value, overwrite=overwrite)
+
+    def compare(
+        self,
+        other,
+        align_axis=1,
+        keep_shape: bool = False,
+        keep_equal: bool = False,
+        result_names=("self", "other"),
+    ) -> DataFrame:  # noqa: PR01, RT01, D200
+        """
+        Compare to another ``DataFrame`` and show the differences.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        if not isinstance(other, DataFrame):
+            raise TypeError(f"Cannot compare DataFrame to {type(other)}")
+        other = self._validate_other(other, 0, compare_index=True)
+        return self.__constructor__(
+            query_compiler=self._query_compiler.compare(
+                other,
+                align_axis=align_axis,
+                keep_shape=keep_shape,
+                keep_equal=keep_equal,
+                result_names=result_names,
+            )
+        )
+
+    def corr(
+        self, method="pearson", min_periods=1, numeric_only=False
+    ):  # noqa: PR01, RT01, D200
+        """
+        Compute pairwise correlation of columns, excluding NA/null values.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        if not numeric_only:
+            return self._default_to_pandas(
+                pandas.DataFrame.corr,
+                method=method,
+                min_periods=min_periods,
+                numeric_only=numeric_only,
+            )
+        return self.__constructor__(
+            query_compiler=self._query_compiler.corr(
+                method=method,
+                min_periods=min_periods,
+            )
+        )
+
+    def corrwith(
+        self, other, axis=0, drop=False, method="pearson", numeric_only=False
+    ):  # noqa: PR01, RT01, D200
+        """
+        Compute pairwise correlation.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        if isinstance(other, DataFrame):
+            other = other._query_compiler.to_pandas()
+        return self._default_to_pandas(
+            pandas.DataFrame.corrwith,
+            other,
+            axis=axis,
+            drop=drop,
+            method=method,
+            numeric_only=numeric_only,
+        )
+
+    def cov(
+        self,
+        min_periods: int | None = None,
+        ddof: int | None = 1,
+        numeric_only: bool = False,
+    ):
+        """
+        Compute pairwise covariance of columns, excluding NA/null values.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.cov(
+                min_periods=min_periods,
+                ddof=ddof,
+                numeric_only=numeric_only,
+            )
+        )
+
+    def dot(self, other):  # noqa: PR01, RT01, D200
+        """
+        Compute the matrix multiplication between the ``DataFrame`` and `other`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if isinstance(other, BasePandasDataset):
+            common = self.columns.union(other.index)
+            if len(common) > len(self.columns) or len(common) > len(
+                other
+            ):  # pragma: no cover
+                raise ValueError("Matrices are not aligned")
+
+            if isinstance(other, DataFrame):
+                return self.__constructor__(
+                    query_compiler=self._query_compiler.dot(
+                        other.reindex(index=common), squeeze_self=False
+                    )
+                )
+            else:
+                return self._reduce_dimension(
+                    query_compiler=self._query_compiler.dot(
+                        other.reindex(index=common), squeeze_self=False
+                    )
+                )
+
+        other = np.asarray(other)
+        if self.shape[1] != other.shape[0]:
+            raise ValueError(
+                f"Dot product shape mismatch, {self.shape} vs {other.shape}"
+            )
+
+        if len(other.shape) > 1:
+            return self.__constructor__(
+                query_compiler=self._query_compiler.dot(other, squeeze_self=False)
+            )
+
+        return self._reduce_dimension(
+            query_compiler=self._query_compiler.dot(other, squeeze_self=False)
+        )
+
+    def eq(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Perform equality comparison of ``DataFrame`` and `other` (binary operator `eq`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("eq", other, axis=axis, level=level)
+
+    def equals(self, other):  # noqa: PR01, RT01, D200
+        """
+        Test whether two objects contain the same elements.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if isinstance(other, pandas.DataFrame):
+            # Copy into a Modin DataFrame to simplify logic below
+            other = self.__constructor__(other)
+        return (
+            self.index.equals(other.index)
+            and self.columns.equals(other.columns)
+            and self.eq(other).all().all()
+        )
+
+    def _update_var_dicts_in_kwargs(self, expr, kwargs):
+        """
+        Copy variables with "@" prefix in `local_dict` and `global_dict` keys of kwargs.
+
+        Parameters
+        ----------
+        expr : str
+            The expression string to search variables with "@" prefix.
+        kwargs : dict
+            See the documentation for eval() for complete details on the keyword arguments accepted by query().
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if "@" not in expr:
+            return
+        frame = sys._getframe()
+        try:
+            f_locals = frame.f_back.f_back.f_back.f_back.f_locals
+            f_globals = frame.f_back.f_back.f_back.f_back.f_globals
+        finally:
+            del frame
+        local_names = set(re.findall(r"@([\w]+)", expr))
+        local_dict = {}
+        global_dict = {}
+
+        for name in local_names:
+            for dct_out, dct_in in ((local_dict, f_locals), (global_dict, f_globals)):
+                try:
+                    dct_out[name] = dct_in[name]
+                except KeyError:
+                    pass
+
+        if local_dict:
+            local_dict.update(kwargs.get("local_dict") or {})
+            kwargs["local_dict"] = local_dict
+        if global_dict:
+            global_dict.update(kwargs.get("global_dict") or {})
+            kwargs["global_dict"] = global_dict
+
+    def eval(self, expr, inplace=False, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Evaluate a string describing operations on ``DataFrame`` columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        self._validate_eval_query(expr, **kwargs)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        self._update_var_dicts_in_kwargs(expr, kwargs)
+        new_query_compiler = self._query_compiler.eval(expr, **kwargs)
+        return_type = type(
+            pandas.DataFrame(columns=self.columns)
+            .astype(self.dtypes)
+            .eval(expr, **kwargs)
+        ).__name__
+        if return_type == type(self).__name__:
+            return self._create_or_update_from_compiler(new_query_compiler, inplace)
+        else:
+            if inplace:
+                raise ValueError("Cannot operate inplace if there is no assignment")
+            return getattr(sys.modules[self.__module__], return_type)(
+                query_compiler=new_query_compiler
+            )
+
+    def fillna(
+        self,
+        value: Hashable | Mapping | Series | DataFrame = None,
+        *,
+        method: FillnaOptions | None = None,
+        axis: Axis | None = None,
+        inplace: bool = False,
+        limit: int | None = None,
+        downcast: dict | None = None,
+    ) -> DataFrame | None:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().fillna(
+            self_is_series=False,
+            value=value,
+            method=method,
+            axis=axis,
+            inplace=inplace,
+            limit=limit,
+            downcast=downcast,
+        )
+
+    def floordiv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `floordiv`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "floordiv",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    @classmethod
+    def from_dict(
+        cls, data, orient="columns", dtype=None, columns=None
+    ):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Construct ``DataFrame`` from dict of array-like or dicts.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return from_pandas(
+            pandas.DataFrame.from_dict(
+                data, orient=orient, dtype=dtype, columns=columns
+            )
+        )
+
+    @classmethod
+    def from_records(
+        cls,
+        data,
+        index=None,
+        exclude=None,
+        columns=None,
+        coerce_float=False,
+        nrows=None,
+    ):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Convert structured or record ndarray to ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return from_pandas(
+            pandas.DataFrame.from_records(
+                data,
+                index=index,
+                exclude=exclude,
+                columns=columns,
+                coerce_float=coerce_float,
+                nrows=nrows,
+            )
+        )
+
+    def ge(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get greater than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("ge", other, axis=axis, level=level)
+
+    def gt(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get greater than comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("gt", other, axis=axis, level=level)
+
+    def hist(
+        self,
+        column=None,
+        by=None,
+        grid=True,
+        xlabelsize=None,
+        xrot=None,
+        ylabelsize=None,
+        yrot=None,
+        ax=None,
+        sharex=False,
+        sharey=False,
+        figsize=None,
+        layout=None,
+        bins=10,
+        **kwds,
+    ):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Make a histogram of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.hist,
+            column=column,
+            by=by,
+            grid=grid,
+            xlabelsize=xlabelsize,
+            xrot=xrot,
+            ylabelsize=ylabelsize,
+            yrot=yrot,
+            ax=ax,
+            sharex=sharex,
+            sharey=sharey,
+            figsize=figsize,
+            layout=layout,
+            bins=bins,
+            **kwds,
+        )
+
+    def info(
+        self,
+        verbose: bool | None = None,
+        buf: IO[str] | None = None,
+        max_cols: int | None = None,
+        memory_usage: bool | str | None = None,
+        show_counts: bool | None = None,
+        null_counts: bool | None = None,
+    ):  # noqa: PR01, D200
+        """
+        Print a concise summary of the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        def put_str(src, output_len=None, spaces=2):
+            src = str(src)
+            return src.ljust(output_len if output_len else len(src)) + " " * spaces
+
+        def format_size(num):
+            for x in ["bytes", "KB", "MB", "GB", "TB"]:
+                if num < 1024.0:
+                    return f"{num:3.1f} {x}"
+                num /= 1024.0
+            return f"{num:3.1f} PB"
+
+        output = []
+
+        type_line = str(type(self))
+        index_line = "SnowflakeIndex"
+        columns = self.columns
+        columns_len = len(columns)
+        dtypes = self.dtypes
+        dtypes_line = f"dtypes: {', '.join(['{}({})'.format(dtype, count) for dtype, count in dtypes.value_counts().items()])}"
+
+        if max_cols is None:
+            max_cols = 100
+
+        exceeds_info_cols = columns_len > max_cols
+
+        if buf is None:
+            buf = sys.stdout
+
+        if null_counts is None:
+            null_counts = not exceeds_info_cols
+
+        if verbose is None:
+            verbose = not exceeds_info_cols
+
+        if null_counts and verbose:
+            # We're gonna take items from `non_null_count` in a loop, which
+            # works kinda slow with `Modin.Series`, that's why we call `_to_pandas()` here
+            # that will be faster.
+            non_null_count = self.count()._to_pandas()
+
+        if memory_usage is None:
+            memory_usage = True
+
+        def get_header(spaces=2):
+            output = []
+            head_label = " # "
+            column_label = "Column"
+            null_label = "Non-Null Count"
+            dtype_label = "Dtype"
+            non_null_label = " non-null"
+            delimiter = "-"
+
+            lengths = {}
+            lengths["head"] = max(len(head_label), len(pprint_thing(len(columns))))
+            lengths["column"] = max(
+                len(column_label), max(len(pprint_thing(col)) for col in columns)
+            )
+            lengths["dtype"] = len(dtype_label)
+            dtype_spaces = (
+                max(lengths["dtype"], max(len(pprint_thing(dtype)) for dtype in dtypes))
+                - lengths["dtype"]
+            )
+
+            header = put_str(head_label, lengths["head"]) + put_str(
+                column_label, lengths["column"]
+            )
+            if null_counts:
+                lengths["null"] = max(
+                    len(null_label),
+                    max(len(pprint_thing(x)) for x in non_null_count)
+                    + len(non_null_label),
+                )
+                header += put_str(null_label, lengths["null"])
+            header += put_str(dtype_label, lengths["dtype"], spaces=dtype_spaces)
+
+            output.append(header)
+
+            delimiters = put_str(delimiter * lengths["head"]) + put_str(
+                delimiter * lengths["column"]
+            )
+            if null_counts:
+                delimiters += put_str(delimiter * lengths["null"])
+            delimiters += put_str(delimiter * lengths["dtype"], spaces=dtype_spaces)
+            output.append(delimiters)
+
+            return output, lengths
+
+        output.extend([type_line, index_line])
+
+        def verbose_repr(output):
+            columns_line = f"Data columns (total {len(columns)} columns):"
+            header, lengths = get_header()
+            output.extend([columns_line, *header])
+            for i, col in enumerate(columns):
+                i, col_s, dtype = map(pprint_thing, [i, col, dtypes[col]])
+
+                to_append = put_str(f" {i}", lengths["head"]) + put_str(
+                    col_s, lengths["column"]
+                )
+                if null_counts:
+                    non_null = pprint_thing(non_null_count[col])
+                    to_append += put_str(f"{non_null} non-null", lengths["null"])
+                to_append += put_str(dtype, lengths["dtype"], spaces=0)
+                output.append(to_append)
+
+        def non_verbose_repr(output):
+            output.append(columns._summary(name="Columns"))
+
+        if verbose:
+            verbose_repr(output)
+        else:
+            non_verbose_repr(output)
+
+        output.append(dtypes_line)
+
+        if memory_usage:
+            deep = memory_usage == "deep"
+            mem_usage_bytes = self.memory_usage(index=True, deep=deep).sum()
+            mem_line = f"memory usage: {format_size(mem_usage_bytes)}"
+
+            output.append(mem_line)
+
+        output.append("")
+        buf.write("\n".join(output))
+
+    def insert(
+        self,
+        loc: int,
+        column: Hashable,
+        value: Scalar | AnyArrayLike,
+        allow_duplicates: bool | NoDefault = no_default,
+    ) -> None:
+        """
+        Insert column into ``DataFrame`` at specified location.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        raise_if_native_pandas_objects(value)
+        if allow_duplicates is no_default:
+            allow_duplicates = False
+        if not allow_duplicates and column in self.columns:
+            raise ValueError(f"cannot insert {column}, already exists")
+
+        if not isinstance(loc, int):
+            raise TypeError("loc must be int")
+
+        # If columns labels are multilevel, we implement following behavior (this is
+        # name native pandas):
+        # Case 1: if 'column' is tuple it's length must be same as number of levels
+        #    otherwise raise error.
+        # Case 2: if 'column' is not a tuple, create a tuple out of it by filling in
+        #     empty strings to match the length of column levels in self frame.
+        if self.columns.nlevels > 1:
+            if isinstance(column, tuple) and len(column) != self.columns.nlevels:
+                # same error as native pandas.
+                raise ValueError("Item must have length equal to number of levels.")
+            if not isinstance(column, tuple):
+                # Fill empty strings to match length of levels
+                suffix = [""] * (self.columns.nlevels - 1)
+                column = tuple([column] + suffix)
+
+        # Dictionary keys are treated as index column and this should be joined with
+        # index of target dataframe. This behavior is similar to 'value' being DataFrame
+        # or Series, so we simply create Series from dict data here.
+        if isinstance(value, dict):
+            value = Series(value, name=column)
+
+        if isinstance(value, DataFrame) or (
+            isinstance(value, np.ndarray) and len(value.shape) > 1
+        ):
+            # Supported numpy array shapes are
+            # 1. (N, )  -> Ex. [1, 2, 3]
+            # 2. (N, 1) -> Ex> [[1], [2], [3]]
+            if value.shape[1] != 1:
+                if isinstance(value, DataFrame):
+                    # Error message updated in pandas 2.1, needs to be upstreamed to OSS modin
+                    raise ValueError(
+                        f"Expected a one-dimensional object, got a {type(value).__name__} with {value.shape[1]} columns instead."
+                    )
+                else:
+                    raise ValueError(
+                        f"Expected a 1D array, got an array with shape {value.shape}"
+                    )
+            # Change numpy array shape from (N, 1) to (N, )
+            if isinstance(value, np.ndarray):
+                value = value.squeeze(axis=1)
+
+        if (
+            is_list_like(value)
+            and not isinstance(value, (Series, DataFrame))
+            and len(value) != self.shape[0]
+            and not 0 == self.shape[0]  # dataframe holds no rows
+        ):
+            raise ValueError(
+                "Length of values ({}) does not match length of index ({})".format(
+                    len(value), len(self)
+                )
+            )
+        if not -len(self.columns) <= loc <= len(self.columns):
+            raise IndexError(
+                f"index {loc} is out of bounds for axis 0 with size {len(self.columns)}"
+            )
+        elif loc < 0:
+            raise ValueError("unbounded slice")
+
+        join_on_index = False
+        if isinstance(value, (Series, DataFrame)):
+            value = value._query_compiler
+            join_on_index = True
+        elif is_list_like(value):
+            value = Series(value, name=column)._query_compiler
+
+        new_query_compiler = self._query_compiler.insert(
+            loc, column, value, join_on_index
+        )
+        # In pandas, 'insert' operation is always inplace.
+        self._update_inplace(new_query_compiler=new_query_compiler)
+
+    def interpolate(
+        self,
+        method="linear",
+        axis=0,
+        limit=None,
+        inplace=False,
+        limit_direction: str | None = None,
+        limit_area=None,
+        downcast=None,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Fill NaN values using an interpolation method.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.interpolate,
+            method=method,
+            axis=axis,
+            limit=limit,
+            inplace=inplace,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            downcast=downcast,
+            **kwargs,
+        )
+
+    def iterrows(self) -> Iterator[tuple[Hashable, Series]]:
+        """
+        Iterate over ``DataFrame`` rows as (index, ``Series``) pairs.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        def iterrow_builder(s):
+            """Return tuple of the given `s` parameter name and the parameter themselves."""
+            return s.name, s
+
+        # Raise warning message since iterrows is very inefficient.
+        WarningMessage.single_warning(
+            DF_ITERROWS_ITERTUPLES_WARNING_MESSAGE.format("DataFrame.iterrows")
+        )
+
+        partition_iterator = SnowparkPandasRowPartitionIterator(self, iterrow_builder)
+        yield from partition_iterator
+
+    def items(self):  # noqa: D200
+        """
+        Iterate over (column name, ``Series``) pairs.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+
+        def items_builder(s):
+            """Return tuple of the given `s` parameter name and the parameter themselves."""
+            return s.name, s
+
+        partition_iterator = PartitionIterator(self, 1, items_builder)
+        yield from partition_iterator
+
+    def iteritems(self):  # noqa: RT01, D200
+        """
+        Iterate over (column name, ``Series``) pairs.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.items()
+
+    def itertuples(
+        self, index: bool = True, name: str | None = "Pandas"
+    ) -> Iterable[tuple[Any, ...]]:
+        """
+        Iterate over ``DataFrame`` rows as ``namedtuple``-s.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+
+        def itertuples_builder(s):
+            """Return the next namedtuple."""
+            # s is the Series of values in the current row.
+            fields = []  # column names
+            data = []  # values under each column
+
+            if index:
+                data.append(s.name)
+                fields.append("Index")
+
+            # Fill column names and values.
+            fields.extend(list(self.columns))
+            data.extend(s)
+
+            if name is not None:
+                # Creating the namedtuple.
+                itertuple = collections.namedtuple(name, fields, rename=True)
+                return itertuple._make(data)
+
+            # When the name is None, return a regular tuple.
+            return tuple(data)
+
+        # Raise warning message since itertuples is very inefficient.
+        WarningMessage.single_warning(
+            DF_ITERROWS_ITERTUPLES_WARNING_MESSAGE.format("DataFrame.itertuples")
+        )
+        return SnowparkPandasRowPartitionIterator(self, itertuples_builder, True)
+
+    def join(
+        self,
+        other: DataFrame | Series | Iterable[DataFrame | Series],
+        on: IndexLabel | None = None,
+        how: str = "left",
+        lsuffix: str = "",
+        rsuffix: str = "",
+        sort: bool = False,
+        validate: str | None = None,
+    ) -> DataFrame:
+        """
+        Join columns of another ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        for o in other if isinstance(other, list) else [other]:
+            raise_if_native_pandas_objects(o)
+
+        # Similar to native pandas we implement 'join' using 'pd.merge' method.
+        # Following code is copied from native pandas (with few changes explained below)
+        # https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/frame.py#L10002
+        if isinstance(other, Series):
+            # Same error as native pandas.
+            if other.name is None:
+                raise ValueError("Other Series must have a name")
+            other = DataFrame(other)
+        elif is_list_like(other):
+            if any([isinstance(o, Series) and o.name is None for o in other]):
+                raise ValueError("Other Series must have a name")
+
+        if isinstance(other, DataFrame):
+            if how == "cross":
+                return pd.merge(
+                    self,
+                    other,
+                    how=how,
+                    on=on,
+                    suffixes=(lsuffix, rsuffix),
+                    sort=sort,
+                    validate=validate,
+                )
+            return pd.merge(
+                self,
+                other,
+                left_on=on,
+                how=how,
+                left_index=on is None,
+                right_index=True,
+                suffixes=(lsuffix, rsuffix),
+                sort=sort,
+                validate=validate,
+            )
+        else:  # List of DataFrame/Series
+            # Same error as native pandas.
+            if on is not None:
+                raise ValueError(
+                    "Joining multiple DataFrames only supported for joining on index"
+                )
+
+            # Same error as native pandas.
+            if rsuffix or lsuffix:
+                raise ValueError(
+                    "Suffixes not supported when joining multiple DataFrames"
+                )
+
+            # NOTE: These are not the differences between Snowpark pandas API and pandas behavior
+            # these are differences between native pandas join behavior when join
+            # frames have unique index or not.
+
+            # In native pandas logic to join multiple DataFrames/Series is data
+            # dependent. Under the hood it will either use 'concat' or 'merge' API
+            # Case 1. If all objects being joined have unique index use 'concat' (axis=1)
+            # Case 2. Otherwise use 'merge' API by looping through objects left to right.
+            # https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/frame.py#L10046
+
+            # Even though concat (axis=1) and merge are very similar APIs they have
+            # some differences which leads to inconsistent behavior in native pandas.
+            # 1. Treatment of un-named Series
+            # Case #1: Un-named series is allowed in concat API. Objects are joined
+            #   successfully by assigning a number as columns name (see 'concat' API
+            #   documentation for details on treatment of un-named series).
+            # Case #2: It raises 'ValueError: Other Series must have a name'
+
+            # 2. how='right'
+            # Case #1: 'concat' API doesn't support right join. It raises
+            #   'ValueError: Only can inner (intersect) or outer (union) join the other axis'
+            # Case #2: Merges successfully.
+
+            # 3. Joining frames with duplicate labels but no conflict with other frames
+            # Example:  self = DataFrame(... columns=["A", "B"])
+            #           other = [DataFrame(... columns=["C", "C"])]
+            # Case #1: 'ValueError: Indexes have overlapping values'
+            # Case #2: Merged successfully.
+
+            # In addition to this, native pandas implementation also leads to another
+            # type of inconsistency where left.join(other, ...) and
+            # left.join([other], ...) might behave differently for cases mentioned
+            # above.
+            # Example:
+            # import pandas as pd
+            # df = pd.DataFrame({"a": [4, 5]})
+            # other = pd.Series([1, 2])
+            # df.join([other])  # this is successful
+            # df.join(other)  # this raises 'ValueError: Other Series must have a name'
+
+            # In Snowpark pandas API, we provide consistent behavior by always using 'merge' API
+            # to join multiple DataFrame/Series. So always follow the behavior
+            # documented as Case #2 above.
+
+            joined = self
+            for frame in other:
+                if isinstance(frame, DataFrame):
+                    overlapping_cols = set(joined.columns).intersection(
+                        set(frame.columns)
+                    )
+                    if len(overlapping_cols) > 0:
+                        # Native pandas raises: 'Indexes have overlapping values'
+                        # We differ slightly from native pandas message to make it more
+                        # useful to users.
+                        raise ValueError(
+                            f"Join dataframes have overlapping column labels: {overlapping_cols}"
+                        )
+                joined = pd.merge(
+                    joined,
+                    frame,
+                    how=how,
+                    left_index=True,
+                    right_index=True,
+                    validate=validate,
+                    sort=sort,
+                    suffixes=(None, None),
+                )
+            return joined
+
+    def isna(self):
+        return super().isna()
+
+    def isnull(self):
+        return super().isnull()
+
+    def isetitem(self, loc, value):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.isetitem,
+            loc=loc,
+            value=value,
+        )
+
+    def le(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get less than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("le", other, axis=axis, level=level)
+
+    def lt(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get less than comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("lt", other, axis=axis, level=level)
+
+    def melt(
+        self,
+        id_vars=None,
+        value_vars=None,
+        var_name=None,
+        value_name="value",
+        col_level=None,
+        ignore_index=True,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Unpivot a ``DataFrame`` from wide to long format, optionally leaving identifiers set.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if id_vars is None:
+            id_vars = []
+        if not is_list_like(id_vars):
+            id_vars = [id_vars]
+        if value_vars is None:
+            # Behavior of Index.difference changed in 2.2.x
+            # https://github.com/pandas-dev/pandas/pull/55113
+            # This change needs upstream to Modin:
+            # https://github.com/modin-project/modin/issues/7206
+            value_vars = self.columns.drop(id_vars)
+        if var_name is None:
+            columns_name = self._query_compiler.get_index_name(axis=1)
+            var_name = columns_name if columns_name is not None else "variable"
+        return self.__constructor__(
+            query_compiler=self._query_compiler.melt(
+                id_vars=id_vars,
+                value_vars=value_vars,
+                var_name=var_name,
+                value_name=value_name,
+                col_level=col_level,
+                ignore_index=ignore_index,
+            )
+        )
+
+    def memory_usage(self, index=True, deep=False):  # noqa: PR01, RT01, D200
+        """
+        Return the memory usage of each column in bytes.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if index:
+            result = self._reduce_dimension(
+                self._query_compiler.memory_usage(index=False, deep=deep)
+            )
+            index_value = self.index.memory_usage(deep=deep)
+            return pd.concat(
+                [Series(index_value, index=["Index"]), result]
+            )  # pragma: no cover
+        return super().memory_usage(index=index, deep=deep)
+
+    def merge(
+        self,
+        right: DataFrame | Series,
+        how: str = "inner",
+        on: IndexLabel | None = None,
+        left_on: Hashable
+        | AnyArrayLike
+        | Sequence[Hashable | AnyArrayLike]
+        | None = None,
+        right_on: Hashable
+        | AnyArrayLike
+        | Sequence[Hashable | AnyArrayLike]
+        | None = None,
+        left_index: bool = False,
+        right_index: bool = False,
+        sort: bool = False,
+        suffixes: Suffixes = ("_x", "_y"),
+        copy: bool = True,
+        indicator: bool = False,
+        validate: str | None = None,
+    ) -> DataFrame:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # Raise error if native pandas objects are passed.
+        raise_if_native_pandas_objects(right)
+
+        if isinstance(right, Series) and right.name is None:
+            raise ValueError("Cannot merge a Series without a name")
+        if not isinstance(right, (Series, DataFrame)):
+            raise TypeError(
+                f"Can only merge Series or DataFrame objects, a {type(right)} was passed"
+            )
+
+        if isinstance(right, Series):
+            right_column_nlevels = (
+                len(right.name) if isinstance(right.name, tuple) else 1
+            )
+        else:
+            right_column_nlevels = right.columns.nlevels
+        if self.columns.nlevels != right_column_nlevels:
+            # This is deprecated in native pandas. We raise explicit error for this.
+            raise ValueError(
+                "Can not merge objects with different column levels."
+                + f" ({self.columns.nlevels} levels on the left,"
+                + f" {right_column_nlevels} on the right)"
+            )
+
+        # Merge empty native pandas dataframes for error checking. Otherwise, it will
+        # require a lot of logic to be written. This takes care of raising errors for
+        # following scenarios:
+        # 1. Only 'left_index' is set to True.
+        # 2. Only 'right_index is set to True.
+        # 3. Only 'left_on' is provided.
+        # 4. Only 'right_on' is provided.
+        # 5. 'on' and 'left_on' both are provided
+        # 6. 'on' and 'right_on' both are provided
+        # 7. 'on' and 'left_index' both are provided
+        # 8. 'on' and 'right_index' both are provided
+        # 9. 'left_on' and 'left_index' both are provided
+        # 10. 'right_on' and 'right_index' both are provided
+        # 11. Length mismatch between 'left_on' and 'right_on'
+        # 12. 'left_index' is not a bool
+        # 13. 'right_index' is not a bool
+        # 14. 'on' is not None and how='cross'
+        # 15. 'left_on' is not None and how='cross'
+        # 16. 'right_on' is not None and how='cross'
+        # 17. 'left_index' is True and how='cross'
+        # 18. 'right_index' is True and how='cross'
+        # 19. Unknown label in 'on', 'left_on' or 'right_on'
+        # 20. Provided 'suffixes' is not sufficient to resolve conflicts.
+        # 21. Merging on column with duplicate labels.
+        # 22. 'how' not in {'left', 'right', 'inner', 'outer', 'cross'}
+        # 23. conflict with existing labels for array-like join key
+        # 24. 'indicator' argument is not bool or str
+        # 25. indicator column label conflicts with existing data labels
+        create_empty_native_pandas_frame(self).merge(
+            create_empty_native_pandas_frame(right),
+            on=on,
+            how=how,
+            left_on=replace_external_data_keys_with_empty_pandas_series(left_on),
+            right_on=replace_external_data_keys_with_empty_pandas_series(right_on),
+            left_index=left_index,
+            right_index=right_index,
+            suffixes=suffixes,
+            indicator=indicator,
+        )
+
+        return self.__constructor__(
+            query_compiler=self._query_compiler.merge(
+                right._query_compiler,
+                how=how,
+                on=on,
+                left_on=replace_external_data_keys_with_query_compiler(self, left_on),
+                right_on=replace_external_data_keys_with_query_compiler(
+                    right, right_on
+                ),
+                left_index=left_index,
+                right_index=right_index,
+                sort=sort,
+                suffixes=suffixes,
+                copy=copy,
+                indicator=indicator,
+                validate=validate,
+            )
+        )
+
+    def mod(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `mod`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "mod",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def mul(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "mul",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    multiply = mul
+
+    def rmul(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "rmul",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def ne(self, other, axis="columns", level=None):  # noqa: PR01, RT01, D200
+        """
+        Get not equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ne`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("ne", other, axis=axis, level=level)
+
+    def nlargest(self, n, columns, keep="first"):  # noqa: PR01, RT01, D200
+        """
+        Return the first `n` rows ordered by `columns` in descending order.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.nlargest(n, columns, keep)
+        )
+
+    def nsmallest(self, n, columns, keep="first"):  # noqa: PR01, RT01, D200
+        """
+        Return the first `n` rows ordered by `columns` in ascending order.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.nsmallest(
+                n=n, columns=columns, keep=keep
+            )
+        )
+
+    def unstack(self, level=-1, fill_value=None):  # noqa: PR01, RT01, D200
+        """
+        Pivot a level of the (necessarily hierarchical) index labels.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        if not isinstance(self.index, pandas.MultiIndex) or (
+            isinstance(self.index, pandas.MultiIndex)
+            and is_list_like(level)
+            and len(level) == self.index.nlevels
+        ):
+            return self._reduce_dimension(
+                query_compiler=self._query_compiler.unstack(level, fill_value)
+            )
+        else:
+            return self.__constructor__(
+                query_compiler=self._query_compiler.unstack(level, fill_value)
+            )
+
+    def pivot(self, index=None, columns=None, values=None):  # noqa: PR01, RT01, D200
+        """
+        Return reshaped ``DataFrame`` organized by given index / column values.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.pivot(
+                index=index, columns=columns, values=values
+            )
+        )
+
+    def pivot_table(
+        self,
+        values=None,
+        index=None,
+        columns=None,
+        aggfunc="mean",
+        fill_value=None,
+        margins=False,
+        dropna=True,
+        margins_name="All",
+        observed=False,
+        sort=True,
+    ):
+        """
+        Create a spreadsheet-style pivot table as a ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        result = self.__constructor__(
+            query_compiler=self._query_compiler.pivot_table(
+                index=index,
+                values=values,
+                columns=columns,
+                aggfunc=aggfunc,
+                fill_value=fill_value,
+                margins=margins,
+                dropna=dropna,
+                margins_name=margins_name,
+                observed=observed,
+                sort=sort,
+            )
+        )
+        return result
+
+    @property
+    def plot(
+        self,
+        x=None,
+        y=None,
+        kind="line",
+        ax=None,
+        subplots=False,
+        sharex=None,
+        sharey=False,
+        layout=None,
+        figsize=None,
+        use_index=True,
+        title=None,
+        grid=None,
+        legend=True,
+        style=None,
+        logx=False,
+        logy=False,
+        loglog=False,
+        xticks=None,
+        yticks=None,
+        xlim=None,
+        ylim=None,
+        rot=None,
+        fontsize=None,
+        colormap=None,
+        table=False,
+        yerr=None,
+        xerr=None,
+        secondary_y=False,
+        sort_columns=False,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Make plots of ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._to_pandas().plot
+
+    def pow(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `pow`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "pow",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def prod(
+        self,
+        axis=None,
+        skipna=True,
+        numeric_only=False,
+        min_count=0,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return the product of the values over the requested axis.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        axis = self._get_axis_number(axis)
+        axis_to_apply = self.columns if axis else self.index
+        if (
+            skipna is not False
+            and numeric_only is None
+            and min_count > len(axis_to_apply)
+        ):
+            new_index = self.columns if not axis else self.index
+            return Series(
+                [np.nan] * len(new_index), index=new_index, dtype=np.dtype("object")
+            )
+
+        data = self._validate_dtypes_sum_prod_mean(axis, numeric_only, ignore_axis=True)
+        if min_count > 1:
+            return data._reduce_dimension(
+                data._query_compiler.prod_min_count(
+                    axis=axis,
+                    skipna=skipna,
+                    numeric_only=numeric_only,
+                    min_count=min_count,
+                    **kwargs,
+                )
+            )
+        return data._reduce_dimension(
+            data._query_compiler.prod(
+                axis=axis,
+                skipna=skipna,
+                numeric_only=numeric_only,
+                min_count=min_count,
+                **kwargs,
+            )
+        )
+
+    product = prod
+
+    def quantile(
+        self,
+        q: Scalar | ListLike = 0.5,
+        axis: Axis = 0,
+        numeric_only: bool = False,
+        interpolation: Literal[
+            "linear", "lower", "higher", "midpoint", "nearest"
+        ] = "linear",
+        method: Literal["single", "table"] = "single",
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().quantile(
+            q=q,
+            axis=axis,
+            numeric_only=numeric_only,
+            interpolation=interpolation,
+            method=method,
+        )
+
+    def query(self, expr, inplace=False, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Query the columns of a ``DataFrame`` with a boolean expression.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        self._update_var_dicts_in_kwargs(expr, kwargs)
+        self._validate_eval_query(expr, **kwargs)
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        new_query_compiler = self._query_compiler.query(expr, **kwargs)
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def rename(
+        self,
+        mapper: Renamer | None = None,
+        *,
+        index: Renamer | None = None,
+        columns: Renamer | None = None,
+        axis: Axis | None = None,
+        copy: bool | None = None,
+        inplace: bool = False,
+        level: Level | None = None,
+        errors: IgnoreRaise = "ignore",
+    ) -> DataFrame | None:
+        """
+        Alter axes labels.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if mapper is None and index is None and columns is None:
+            raise TypeError("must pass an index to rename")
+
+        if index is not None or columns is not None:
+            if axis is not None:
+                raise TypeError(
+                    "Cannot specify both 'axis' and any of 'index' or 'columns'"
+                )
+            elif mapper is not None:
+                raise TypeError(
+                    "Cannot specify both 'mapper' and any of 'index' or 'columns'"
+                )
+        else:
+            # use the mapper argument
+            if axis and self._get_axis_number(axis) == 1:
+                columns = mapper
+            else:
+                index = mapper
+
+        if copy is not None:
+            WarningMessage.ignored_argument(
+                operation="dataframe.rename",
+                argument="copy",
+                message="copy parameter has been ignored with Snowflake execution engine",
+            )
+
+        if isinstance(index, dict):
+            index = Series(index)
+
+        new_qc = self._query_compiler.rename(
+            index_renamer=index, columns_renamer=columns, level=level, errors=errors
+        )
+        return self._create_or_update_from_compiler(
+            new_query_compiler=new_qc, inplace=inplace
+        )
+
+    def reindex(
+        self,
+        labels=None,
+        index=None,
+        columns=None,
+        axis=None,
+        method=None,
+        copy=None,
+        level=None,
+        fill_value=np.nan,
+        limit=None,
+        tolerance=None,
+    ):  # noqa: PR01, RT01, D200
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        axis = self._get_axis_number(axis)
+        if axis == 0 and labels is not None:
+            index = labels
+        elif labels is not None:
+            columns = labels
+        return super().reindex(
+            index=index,
+            columns=columns,
+            method=method,
+            copy=copy,
+            level=level,
+            fill_value=fill_value,
+            limit=limit,
+            tolerance=tolerance,
+        )
+
+    def replace(
+        self,
+        to_replace=None,
+        value=no_default,
+        inplace: bool = False,
+        limit=None,
+        regex: bool = False,
+        method: str | NoDefault = no_default,
+    ):
+        """
+        Replace values given in `to_replace` with `value`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        new_query_compiler = self._query_compiler.replace(
+            to_replace=to_replace,
+            value=value,
+            limit=limit,
+            regex=regex,
+            method=method,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def rfloordiv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `rfloordiv`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "rfloordiv",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def radd(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `radd`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "radd",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def rmod(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `rmod`).
+        """
+        return self._binary_op(
+            "rmod",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def round(self, decimals=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+        return super().round(decimals, args=args, **kwargs)
+
+    def rpow(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `rpow`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "rpow",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def rsub(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `rsub`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "rsub",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    def rtruediv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `rtruediv`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "rtruediv",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    rdiv = rtruediv
+
+    def select_dtypes(
+        self,
+        include: ListLike | str | type | None = None,
+        exclude: ListLike | str | type | None = None,
+    ) -> DataFrame:
+        """
+        Return a subset of the ``DataFrame``'s columns based on the column dtypes.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # This line defers argument validation to pandas, which will raise errors on our behalf in cases
+        # like if `include` and `exclude` are None, the same type is specified in both lists, or a string
+        # dtype (as opposed to object) is specified.
+        pandas.DataFrame().select_dtypes(include, exclude)
+
+        if include and not is_list_like(include):
+            include = [include]
+        elif include is None:
+            include = []
+        if exclude and not is_list_like(exclude):
+            exclude = [exclude]
+        elif exclude is None:
+            exclude = []
+
+        sel = tuple(map(set, (include, exclude)))
+
+        # The width of the np.int_/float_ alias differs between Windows and other platforms, so
+        # we need to include a workaround.
+        # https://github.com/numpy/numpy/issues/9464
+        # https://github.com/pandas-dev/pandas/blob/f538741432edf55c6b9fb5d0d496d2dd1d7c2457/pandas/core/frame.py#L5036
+        def check_sized_number_infer_dtypes(dtype):
+            if (isinstance(dtype, str) and dtype == "int") or (dtype is int):
+                return [np.int32, np.int64]
+            elif dtype == "float" or dtype is float:
+                return [np.float64, np.float32]
+            else:
+                return [infer_dtype_from_object(dtype)]
+
+        include, exclude = map(
+            lambda x: set(
+                itertools.chain.from_iterable(map(check_sized_number_infer_dtypes, x))
+            ),
+            sel,
+        )
+        # We need to index on column position rather than label in case of duplicates
+        include_these = pandas.Series(not bool(include), index=range(len(self.columns)))
+        exclude_these = pandas.Series(not bool(exclude), index=range(len(self.columns)))
+
+        def is_dtype_instance_mapper(dtype):
+            return functools.partial(issubclass, dtype.type)
+
+        for i, dtype in enumerate(self.dtypes):
+            if include:
+                include_these[i] = any(map(is_dtype_instance_mapper(dtype), include))
+            if exclude:
+                exclude_these[i] = not any(
+                    map(is_dtype_instance_mapper(dtype), exclude)
+                )
+
+        dtype_indexer = include_these & exclude_these
+        indicate = [i for i, should_keep in dtype_indexer.items() if should_keep]
+        # We need to use iloc instead of drop in case of duplicate column names
+        return self.iloc[:, indicate]
+
+    def shift(
+        self,
+        periods: int = 1,
+        freq=None,
+        axis: Axis = 0,
+        fill_value: Hashable = no_default,
+    ) -> DataFrame:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().shift(periods, freq, axis, fill_value)
+
+    def set_index(
+        self,
+        keys: IndexLabel
+        | list[IndexLabel | pd.Index | pd.Series | list | np.ndarray | Iterable],
+        drop: bool = True,
+        append: bool = False,
+        inplace: bool = False,
+        verify_integrity: bool = False,
+    ) -> None | DataFrame:
+        """
+        Set the ``DataFrame`` index using existing columns.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        if not isinstance(keys, list):
+            keys = [keys]
+
+        # make sure key is either hashable, index, or series
+        label_or_series = []
+
+        missing = []
+        columns = self.columns.tolist()
+        for key in keys:
+            raise_if_native_pandas_objects(key)
+            if isinstance(key, pd.Series):
+                label_or_series.append(key._query_compiler)
+            elif isinstance(key, (np.ndarray, list, Iterator)):
+                label_or_series.append(pd.Series(key)._query_compiler)
+            elif isinstance(key, pd.Index):
+                label_or_series += [
+                    s._query_compiler for s in self._to_series_list(key)
+                ]
+            else:
+                if not is_hashable(key):
+                    raise TypeError(
+                        f'The parameter "keys" may be a column key, one-dimensional array, or a list '
+                        f"containing only valid column keys and one-dimensional arrays. Received column "
+                        f"of type {type(key)}"
+                    )
+                label_or_series.append(key)
+                found = key in columns
+                if columns.count(key) > 1:
+                    raise ValueError(f"The column label '{key}' is not unique")
+                elif not found:
+                    missing.append(key)
+
+        if missing:
+            raise KeyError(f"None of {missing} are in the columns")
+
+        new_query_compiler = self._query_compiler.set_index(
+            label_or_series, drop=drop, append=append
+        )
+
+        # TODO: SNOW-782633 improve this code once duplicate is supported
+        # this needs to pull all index which is inefficient
+        if verify_integrity and not new_query_compiler.index.is_unique:
+            duplicates = new_query_compiler.index[
+                new_query_compiler.index.duplicated()
+            ].unique()
+            raise ValueError(f"Index has duplicate keys: {duplicates}")
+
+        return self._create_or_update_from_compiler(new_query_compiler, inplace=inplace)
+
+    sparse = CachedAccessor("sparse", SparseFrameAccessor)
+
+    def squeeze(self, axis: Axis | None = None):
+        """
+        Squeeze 1 dimensional axis objects into scalars.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        axis = self._get_axis_number(axis) if axis is not None else None
+        len_columns = self._query_compiler.get_axis_len(1)
+        if axis == 1 and len_columns == 1:
+            return Series(query_compiler=self._query_compiler)
+        # get_axis_len(0) results in a sql query to count number of rows in current
+        # dataframe. We should only compute len_index if axis is 0 or None.
+        len_index = len(self)
+        if axis is None and (len_columns == 1 or len_index == 1):
+            return Series(query_compiler=self._query_compiler).squeeze()
+        if axis == 0 and len_index == 1:
+            return Series(query_compiler=self.T._query_compiler)
+        else:
+            return self.copy()
+
+    def stack(self, level=-1, dropna=True):  # noqa: PR01, RT01, D200
+        """
+        Stack the prescribed level(s) from columns to index.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        if not isinstance(self.columns, pandas.MultiIndex) or (
+            isinstance(self.columns, pandas.MultiIndex)
+            and is_list_like(level)
+            and len(level) == self.columns.nlevels
+        ):
+            return self._reduce_dimension(
+                query_compiler=self._query_compiler.stack(level, dropna)
+            )
+        else:
+            return self.__constructor__(
+                query_compiler=self._query_compiler.stack(level, dropna)
+            )
+
+    def sub(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `sub`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "sub",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    subtract = sub
+
+    def to_feather(self, path, **kwargs):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Write a ``DataFrame`` to the binary Feather format.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(pandas.DataFrame.to_feather, path, **kwargs)
+
+    def to_gbq(
+        self,
+        destination_table,
+        project_id=None,
+        chunksize=None,
+        reauth=False,
+        if_exists="fail",
+        auth_local_webserver=True,
+        table_schema=None,
+        location=None,
+        progress_bar=True,
+        credentials=None,
+    ):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Write a ``DataFrame`` to a Google BigQuery table.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functionsf
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.to_gbq,
+            destination_table,
+            project_id=project_id,
+            chunksize=chunksize,
+            reauth=reauth,
+            if_exists=if_exists,
+            auth_local_webserver=auth_local_webserver,
+            table_schema=table_schema,
+            location=location,
+            progress_bar=progress_bar,
+            credentials=credentials,
+        )
+
+    def to_orc(self, path=None, *, engine="pyarrow", index=None, engine_kwargs=None):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.to_orc,
+            path=path,
+            engine=engine,
+            index=index,
+            engine_kwargs=engine_kwargs,
+        )
+
+    def to_html(
+        self,
+        buf=None,
+        columns=None,
+        col_space=None,
+        header=True,
+        index=True,
+        na_rep="NaN",
+        formatters=None,
+        float_format=None,
+        sparsify=None,
+        index_names=True,
+        justify=None,
+        max_rows=None,
+        max_cols=None,
+        show_dimensions=False,
+        decimal=".",
+        bold_rows=True,
+        classes=None,
+        escape=True,
+        notebook=False,
+        border=None,
+        table_id=None,
+        render_links=False,
+        encoding=None,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Render a ``DataFrame`` as an HTML table.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.to_html,
+            buf=buf,
+            columns=columns,
+            col_space=col_space,
+            header=header,
+            index=index,
+            na_rep=na_rep,
+            formatters=formatters,
+            float_format=float_format,
+            sparsify=sparsify,
+            index_names=index_names,
+            justify=justify,
+            max_rows=max_rows,
+            max_cols=max_cols,
+            show_dimensions=show_dimensions,
+            decimal=decimal,
+            bold_rows=bold_rows,
+            classes=classes,
+            escape=escape,
+            notebook=notebook,
+            border=border,
+            table_id=table_id,
+            render_links=render_links,
+            encoding=None,
+        )
+
+    def to_parquet(
+        self,
+        path=None,
+        engine="auto",
+        compression="snappy",
+        index=None,
+        partition_cols=None,
+        storage_options: StorageOptions = None,
+        **kwargs,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        from snowflake.snowpark.modin.pandas.dispatching.factories.dispatcher import (
+            FactoryDispatcher,
+        )
+
+        return FactoryDispatcher.to_parquet(
+            self._query_compiler,
+            path=path,
+            engine=engine,
+            compression=compression,
+            index=index,
+            partition_cols=partition_cols,
+            storage_options=storage_options,
+            **kwargs,
+        )
+
+    def to_period(
+        self, freq=None, axis=0, copy=True
+    ):  # pragma: no cover # noqa: PR01, RT01, D200
+        """
+        Convert ``DataFrame`` from ``DatetimeIndex`` to ``PeriodIndex``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return super().to_period(freq=freq, axis=axis, copy=copy)
+
+    def to_records(
+        self, index=True, column_dtypes=None, index_dtypes=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Convert ``DataFrame`` to a NumPy record array.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.to_records,
+            index=index,
+            column_dtypes=column_dtypes,
+            index_dtypes=index_dtypes,
+        )
+
+    def to_stata(
+        self,
+        path: FilePath | WriteBuffer[bytes],
+        convert_dates: dict[Hashable, str] | None = None,
+        write_index: bool = True,
+        byteorder: str | None = None,
+        time_stamp: datetime.datetime | None = None,
+        data_label: str | None = None,
+        variable_labels: dict[Hashable, str] | None = None,
+        version: int | None = 114,
+        convert_strl: Sequence[Hashable] | None = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
+        *,
+        value_labels: dict[Hashable, dict[float | int, str]] | None = None,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.to_stata,
+            path,
+            convert_dates=convert_dates,
+            write_index=write_index,
+            byteorder=byteorder,
+            time_stamp=time_stamp,
+            data_label=data_label,
+            variable_labels=variable_labels,
+            version=version,
+            convert_strl=convert_strl,
+            compression=compression,
+            storage_options=storage_options,
+            value_labels=value_labels,
+        )
+
+    def to_xml(
+        self,
+        path_or_buffer=None,
+        index=True,
+        root_name="data",
+        row_name="row",
+        na_rep=None,
+        attr_cols=None,
+        elem_cols=None,
+        namespaces=None,
+        prefix=None,
+        encoding="utf-8",
+        xml_declaration=True,
+        pretty_print=True,
+        parser="lxml",
+        stylesheet=None,
+        compression="infer",
+        storage_options=None,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.default_to_pandas(
+                pandas.DataFrame.to_xml,
+                path_or_buffer=path_or_buffer,
+                index=index,
+                root_name=root_name,
+                row_name=row_name,
+                na_rep=na_rep,
+                attr_cols=attr_cols,
+                elem_cols=elem_cols,
+                namespaces=namespaces,
+                prefix=prefix,
+                encoding=encoding,
+                xml_declaration=xml_declaration,
+                pretty_print=pretty_print,
+                parser=parser,
+                stylesheet=stylesheet,
+                compression=compression,
+                storage_options=storage_options,
+            )
+        )
+
+    def to_dict(
+        self,
+        orient: Literal[
+            "dict", "list", "series", "split", "tight", "records", "index"
+        ] = "dict",
+        into: type[dict] = dict,
+    ) -> dict | list[dict]:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._to_pandas().to_dict(orient=orient, into=into)
+
+    def to_timestamp(
+        self, freq=None, how="start", axis=0, copy=True
+    ):  # noqa: PR01, RT01, D200
+        """
+        Cast to DatetimeIndex of timestamps, at *beginning* of period.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return super().to_timestamp(freq=freq, how=how, axis=axis, copy=copy)
+
+    def truediv(
+        self, other, axis="columns", level=None, fill_value=None
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op(
+            "truediv",
+            other,
+            axis=axis,
+            level=level,
+            fill_value=fill_value,
+        )
+
+    div = divide = truediv
+
+    def update(
+        self, other, join="left", overwrite=True, filter_func=None, errors="ignore"
+    ):  # noqa: PR01, RT01, D200
+        """
+        Modify in place using non-NA values from another ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if not isinstance(other, DataFrame):
+            other = self.__constructor__(other)
+        query_compiler = self._query_compiler.df_update(
+            other._query_compiler,
+            join=join,
+            overwrite=overwrite,
+            filter_func=filter_func,
+            errors=errors,
+        )
+        self._update_inplace(new_query_compiler=query_compiler)
+
+    def diff(
+        self,
+        periods: int = 1,
+        axis: Axis = 0,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().diff(
+            periods=periods,
+            axis=axis,
+        )
+
+    def drop(
+        self,
+        labels: IndexLabel = None,
+        axis: Axis = 0,
+        index: IndexLabel = None,
+        columns: IndexLabel = None,
+        level: Level = None,
+        inplace: bool = False,
+        errors: IgnoreRaise = "raise",
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().drop(
+            labels=labels,
+            axis=axis,
+            index=index,
+            columns=columns,
+            level=level,
+            inplace=inplace,
+            errors=errors,
+        )
+
+    def value_counts(
+        self,
+        subset: Sequence[Hashable] | None = None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        dropna: bool = True,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return Series(
+            query_compiler=self._query_compiler.value_counts(
+                subset=subset,
+                normalize=normalize,
+                sort=sort,
+                ascending=ascending,
+                dropna=dropna,
+            ),
+            name="proportion" if normalize else "count",
+        )
+
+    def mask(
+        self,
+        cond: DataFrame | Series | Callable | AnyArrayLike,
+        other: DataFrame | Series | Callable | Scalar | None = np.nan,
+        *,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if isinstance(other, Series) and axis is None:
+            raise ValueError(
+                "df.mask requires an axis parameter (0 or 1) when given a Series"
+            )
+
+        return super().mask(
+            cond,
+            other=other,
+            inplace=inplace,
+            axis=axis,
+            level=level,
+        )
+
+    def where(
+        self,
+        cond: DataFrame | Series | Callable | AnyArrayLike,
+        other: DataFrame | Series | Callable | Scalar | None = np.nan,
+        *,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        """
+        Replace values where the condition is False.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if isinstance(other, Series) and axis is None:
+            raise ValueError(
+                "df.where requires an axis parameter (0 or 1) when given a Series"
+            )
+
+        return super().where(
+            cond,
+            other=other,
+            inplace=inplace,
+            axis=axis,
+            level=level,
+        )
+
+    def xs(self, key, axis=0, level=None, drop_level=True):  # noqa: PR01, RT01, D200
+        """
+        Return cross-section from the ``DataFrame``.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.DataFrame.xs, key, axis=axis, level=level, drop_level=drop_level
+        )
+
+    def set_axis(
+        self,
+        labels: IndexLabel,
+        *,
+        axis: Axis = 0,
+        copy: bool | NoDefault = no_default,  # ignored
+    ):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if not is_scalar(axis):
+            raise TypeError(f"{type(axis).__name__} is not a valid type for axis.")
+        return super().set_axis(
+            labels=labels,
+            # 'columns', 'rows, 'index, 0, and 1 are the only valid axis values for df.
+            axis=pandas.DataFrame._get_axis_name(axis),
+            copy=copy,
+        )
+
+    def __getattr__(self, key):
+        """
+        Return item identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to get.
+
+        Returns
+        -------
+        Any
+
+        Notes
+        -----
+        First try to use `__getattribute__` method. If it fails
+        try to get `key` from ``DataFrame`` fields.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        try:
+            return object.__getattribute__(self, key)
+        except AttributeError as err:
+            if key not in _ATTRS_NO_LOOKUP and key in self.columns:
+                return self[key]
+            raise err
+
+    def __setattr__(self, key, value):
+        """
+        Set attribute `value` identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to set.
+        value : Any
+            Value to set.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # While we let users assign to a column labeled "x" with "df.x" , there
+        # are some attributes that we should assume are NOT column names and
+        # therefore should follow the default Python object assignment
+        # behavior. These are:
+        # - anything in self.__dict__. This includes any attributes that the
+        #   user has added to the dataframe with,  e.g., `df.c = 3`, and
+        #   any attribute that Modin has added to the frame, e.g.
+        #   `_query_compiler` and `_siblings`
+        # - `_query_compiler`, which Modin initializes before it appears in
+        #   __dict__
+        # - `_siblings`, which Modin initializes before it appears in __dict__
+        # - `_cache`, which pandas.cache_readonly uses to cache properties
+        #   before it appears in __dict__.
+        if key in ("_query_compiler", "_siblings", "_cache") or key in self.__dict__:
+            pass
+        elif key in self and key not in dir(self):
+            self.__setitem__(key, value)
+            # Note: return immediately so we don't keep this `key` as dataframe state.
+            # `__getattr__` will return the columns not present in `dir(self)`, so we do not need
+            # to manually track this state in the `dir`.
+            return
+        elif is_list_like(value) and key not in ["index", "columns"]:
+            WarningMessage.single_warning(
+                SET_DATAFRAME_ATTRIBUTE_WARNING
+            )  # pragma: no cover
+        object.__setattr__(self, key, value)
+
+    def __setitem__(self, key: Any, value: Any):
+        """
+        Set attribute `value` identified by `key`.
+
+        Args:
+            key: Key to set
+            value:  Value to set
+
+        Note:
+            In the case where value is any list like or array, pandas checks the array length against the number of rows
+            of the input dataframe. If there is a mismatch, a ValueError is raised. Snowpark pandas indexing won't throw
+            a ValueError because knowing the length of the current dataframe can trigger eager evaluations; instead if
+            the array is longer than the number of rows we ignore the additional values. If the array is shorter, we use
+            enlargement filling with the last value in the array.
+
+        Returns:
+            None
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        key = apply_if_callable(key, self)
+        if isinstance(key, DataFrame) or (
+            isinstance(key, np.ndarray) and len(key.shape) == 2
+        ):
+            # This case uses mask's codepath to perform the set, but
+            # we need to duplicate the code here since we are passing
+            # an additional kwarg `cond_fillna_with_true` to the QC here.
+            # We need this additional kwarg, since if df.shape
+            # and key.shape do not align (i.e. df has more rows),
+            # mask's codepath would mask the additional rows in df
+            # while for setitem, we need to keep the original values.
+            if not isinstance(key, DataFrame):
+                if key.dtype != bool:
+                    raise TypeError(
+                        "Must pass DataFrame or 2-d ndarray with boolean values only"
+                    )
+                key = DataFrame(key)
+                key._query_compiler._shape_hint = "array"
+
+            if value is not None:
+                value = apply_if_callable(value, self)
+
+                if isinstance(value, np.ndarray):
+                    value = DataFrame(value)
+                    value._query_compiler._shape_hint = "array"
+                elif isinstance(value, pd.Series):
+                    # pandas raises the `mask` ValueError here: Must specify axis = 0 or 1. We raise this
+                    # error instead, since it is more descriptive.
+                    raise ValueError(
+                        "setitem with a 2D key does not support Series values."
+                    )
+
+                if isinstance(value, BasePandasDataset):
+                    value = value._query_compiler
+
+            query_compiler = self._query_compiler.mask(
+                cond=key._query_compiler,
+                other=value,
+                axis=None,
+                level=None,
+                cond_fillna_with_true=True,
+            )
+
+            return self._create_or_update_from_compiler(query_compiler, inplace=True)
+
+        # Error Checking:
+        if (isinstance(key, pd.Series) or is_list_like(key)) and (
+            isinstance(value, range)
+        ):
+            raise NotImplementedError(DF_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE)
+        elif isinstance(value, slice):
+            # Here, the whole slice is assigned as a scalar variable, i.e., a spot at an index gets a slice value.
+            raise NotImplementedError(DF_SETITEM_SLICE_AS_SCALAR_VALUE)
+
+        # Note: when key is a boolean indexer or slice the key is a row key; otherwise, the key is always a column
+        # key.
+        index, columns = slice(None), key
+        index_is_bool_indexer = False
+        if isinstance(key, slice):
+            if is_integer(key.start) and is_integer(key.stop):
+                # when slice are integer slice, e.g., df[1:2] = val, the behavior is the same as
+                # df.iloc[1:2, :] = val
+                self.iloc[key] = value
+                return
+            index, columns = key, slice(None)
+        elif isinstance(key, pd.Series):
+            if is_bool_dtype(key.dtype):
+                index, columns = key, slice(None)
+                index_is_bool_indexer = True
+        elif is_bool_indexer(key):
+            index, columns = pd.Series(key), slice(None)
+            index_is_bool_indexer = True
+
+        # The reason we do not call loc directly is that setitem has different behavior compared to loc in this case
+        # we have to explicitly set matching_item_columns_by_label to False for setitem.
+        index = index._query_compiler if isinstance(index, BasePandasDataset) else index
+        columns = (
+            columns._query_compiler
+            if isinstance(columns, BasePandasDataset)
+            else columns
+        )
+        from .indexing import is_2d_array
+
+        matching_item_rows_by_label = not is_2d_array(value)
+        if is_2d_array(value):
+            value = DataFrame(value)
+        item = value._query_compiler if isinstance(value, BasePandasDataset) else value
+        new_qc = self._query_compiler.set_2d_labels(
+            index,
+            columns,
+            item,
+            # setitem always matches item by position
+            matching_item_columns_by_label=False,
+            matching_item_rows_by_label=matching_item_rows_by_label,
+            index_is_bool_indexer=index_is_bool_indexer,
+            # setitem always deduplicates columns. E.g., if df has two columns "A" and "B", after calling
+            # df[["A","A"]] = item, df still only has two columns "A" and "B", and "A"'s values are set by the
+            # second "A" column from value; instead, if we call df.loc[:, ["A", "A"]] = item, then df will have
+            # three columns "A", "A", "B". Similarly, if we call df[["X","X"]] = item, df will have three columns
+            # "A", "B", "X", while if we call df.loc[:, ["X", "X"]] = item, then df will have four columns "A", "B",
+            # "X", "X".
+            deduplicate_columns=True,
+        )
+        return self._update_inplace(new_query_compiler=new_qc)
+
+    def abs(self):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().abs()
+
+    def __and__(self, other):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("__and__", other, axis=1)
+
+    def __rand__(self, other):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("__rand__", other, axis=1)
+
+    def __or__(self, other):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("__or__", other, axis=1)
+
+    def __ror__(self, other):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._binary_op("__ror__", other, axis=1)
+
+    def __neg__(self):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().__neg__()
+
+    def __iter__(self):
+        """
+        Iterate over info axis.
+
+        Returns
+        -------
+        iterable
+            Iterator of the columns names.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return iter(self.columns)
+
+    def __contains__(self, key):
+        """
+        Check if `key` in the ``DataFrame.columns``.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to check the presence in the columns.
+
+        Returns
+        -------
+        bool
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self.columns.__contains__(key)
+
+    def __round__(self, decimals=0):
+        """
+        Round each value in a ``DataFrame`` to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default: 0
+            Number of decimal places to round to.
+
+        Returns
+        -------
+        DataFrame
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return super().round(decimals)
+
+    def __delitem__(self, key):
+        """
+        Delete item identified by `key` label.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to delete.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if key not in self:
+            raise KeyError(key)
+        self._update_inplace(new_query_compiler=self._query_compiler.delitem(key))
+
+    __add__ = add
+    __iadd__ = add  # pragma: no cover
+    __radd__ = radd
+    __mul__ = mul
+    __imul__ = mul  # pragma: no cover
+    __rmul__ = rmul
+    __pow__ = pow
+    __ipow__ = pow  # pragma: no cover
+    __rpow__ = rpow
+    __sub__ = sub
+    __isub__ = sub  # pragma: no cover
+    __rsub__ = rsub
+    __floordiv__ = floordiv
+    __ifloordiv__ = floordiv  # pragma: no cover
+    __rfloordiv__ = rfloordiv
+    __truediv__ = truediv
+    __itruediv__ = truediv  # pragma: no cover
+    __rtruediv__ = rtruediv
+    __mod__ = mod
+    __imod__ = mod  # pragma: no cover
+    __rmod__ = rmod
+    __rdiv__ = rdiv
+
+    def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):
+        """
+        Get a Modin DataFrame that implements the dataframe exchange protocol.
+
+        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default: False
+            A keyword intended for the consumer to tell the producer
+            to overwrite null values in the data with ``NaN`` (or ``NaT``).
+            This currently has no effect; once support for nullable extension
+            dtypes is added, this value should be propagated to columns.
+        allow_copy : bool, default: True
+            A keyword that defines whether or not the library is allowed
+            to make a copy of the data. For example, copying data would be necessary
+            if a library supports strided buffers, given that this protocol
+            specifies contiguous buffers. Currently, if the flag is set to ``False``
+            and a copy is needed, a ``RuntimeError`` will be raised.
+
+        Returns
+        -------
+        ProtocolDataframe
+            A dataframe object following the dataframe protocol specification.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        return self._query_compiler.to_dataframe(
+            nan_as_null=nan_as_null, allow_copy=allow_copy
+        )
+
+    @property
+    def attrs(self):  # noqa: RT01, D200
+        """
+        Return dictionary of global attributes of this dataset.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        def attrs(df):
+            return df.attrs
+
+        return self._default_to_pandas(attrs)
+
+    @property
+    def style(self):  # noqa: RT01, D200
+        """
+        Return a Styler object.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()
+
+        def style(df):
+            """Define __name__ attr because properties do not have it."""
+            return df.style
+
+        return self._default_to_pandas(style)
+
+    def isin(
+        self, values: ListLike | Series | DataFrame | dict[Hashable, ListLike]
+    ) -> DataFrame:
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if isinstance(values, dict):
+            return super().isin(values)
+        elif isinstance(values, Series):
+            # Note: pandas performs explicit is_unique check here, deactivated for performance reasons.
+            # if not values.index.is_unique:
+            #   raise ValueError("cannot compute isin with a duplicate axis.")
+            return self.__constructor__(
+                query_compiler=self._query_compiler.isin(values._query_compiler)
+            )
+        elif isinstance(values, DataFrame):
+            # Note: pandas performs explicit is_unique check here, deactivated for performance reasons.
+            # if not (values.columns.is_unique and values.index.is_unique):
+            #    raise ValueError("cannot compute isin with a duplicate axis.")
+            return self.__constructor__(
+                query_compiler=self._query_compiler.isin(values._query_compiler)
+            )
+        else:
+            if not is_list_like(values):
+                # throw pandas compatible error
+                raise TypeError(
+                    "only list-like or dict-like objects are allowed "
+                    f"to be passed to {self.__class__.__name__}.isin(), "
+                    f"you passed a '{type(values).__name__}'"
+                )
+            return super().isin(values)
+
+    def _create_or_update_from_compiler(self, new_query_compiler, inplace=False):
+        """
+        Return or update a ``DataFrame`` with given `new_query_compiler`.
+
+        Parameters
+        ----------
+        new_query_compiler : PandasQueryCompiler
+            QueryCompiler to use to manage the data.
+        inplace : bool, default: False
+            Whether or not to perform update or creation inplace.
+
+        Returns
+        -------
+        DataFrame or None
+            None if update was done, ``DataFrame`` otherwise.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        assert (
+            isinstance(new_query_compiler, type(self._query_compiler))
+            or type(new_query_compiler) in self._query_compiler.__class__.__bases__
+        ), f"Invalid Query Compiler object: {type(new_query_compiler)}"
+        if not inplace:
+            return self.__constructor__(query_compiler=new_query_compiler)
+        else:
+            self._update_inplace(new_query_compiler=new_query_compiler)
+
+    def _get_numeric_data(self, axis: int):
+        """
+        Grab only numeric data from ``DataFrame``.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to inspect on having numeric types only.
+
+        Returns
+        -------
+        DataFrame
+            ``DataFrame`` with numeric data.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # pandas ignores `numeric_only` if `axis` is 1, but we do have to drop
+        # non-numeric columns if `axis` is 0.
+        if axis != 0:
+            return self
+        return self.drop(
+            columns=[
+                i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
+            ]
+        )
+
+    def _validate_dtypes(self, numeric_only=False):
+        """
+        Check that all the dtypes are the same.
+
+        Parameters
+        ----------
+        numeric_only : bool, default: False
+            Whether or not to allow only numeric data.
+            If True and non-numeric data is found, exception
+            will be raised.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        dtype = self.dtypes[0]
+        for t in self.dtypes:
+            if numeric_only and not is_numeric_dtype(t):
+                raise TypeError(f"{t} is not a numeric data type")
+            elif not numeric_only and t != dtype:
+                raise TypeError(f"Cannot compare type '{t}' with type '{dtype}'")
+
+    def _validate_dtypes_sum_prod_mean(self, axis, numeric_only, ignore_axis=False):
+        """
+        Validate data dtype for `sum`, `prod` and `mean` methods.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to validate over.
+        numeric_only : bool
+            Whether or not to allow only numeric data.
+            If True and non-numeric data is found, exception
+            will be raised.
+        ignore_axis : bool, default: False
+            Whether or not to ignore `axis` parameter.
+
+        Returns
+        -------
+        DataFrame
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        # We cannot add datetime types, so if we are summing a column with
+        # dtype datetime64 and cannot ignore non-numeric types, we must throw a
+        # TypeError.
+        if (
+            not axis
+            and numeric_only is False
+            and any(dtype == np.dtype("datetime64[ns]") for dtype in self.dtypes)
+        ):
+            raise TypeError("Cannot add Timestamp Types")
+
+        # If our DataFrame has both numeric and non-numeric dtypes then
+        # operations between these types do not make sense and we must raise a
+        # TypeError. The exception to this rule is when there are datetime and
+        # timedelta objects, in which case we proceed with the comparison
+        # without ignoring any non-numeric types. We must check explicitly if
+        # numeric_only is False because if it is None, it will default to True
+        # if the operation fails with mixed dtypes.
+        if (
+            (axis or ignore_axis)
+            and numeric_only is False
+            and np.unique([is_numeric_dtype(dtype) for dtype in self.dtypes]).size == 2
+        ):
+            # check if there are columns with dtypes datetime or timedelta
+            if all(
+                dtype != np.dtype("datetime64[ns]")
+                and dtype != np.dtype("timedelta64[ns]")
+                for dtype in self.dtypes
+            ):
+                raise TypeError("Cannot operate on Numeric and Non-Numeric Types")
+
+        return self._get_numeric_data(axis) if numeric_only else self
+
+    def _to_pandas(
+        self,
+        *,
+        statement_params: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> pandas.DataFrame:
+        """
+        Convert Snowpark pandas DataFrame to pandas DataFrame
+
+        Args:
+            statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+        Returns:
+            pandas DataFrame
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._query_compiler.to_pandas(
+            statement_params=statement_params, **kwargs
+        )
+
+    def _validate_eval_query(self, expr, **kwargs):
+        """
+        Validate the arguments of ``eval`` and ``query`` functions.
+
+        Parameters
+        ----------
+        expr : str
+            The expression to evaluate. This string cannot contain any
+            Python statements, only Python expressions.
+        **kwargs : dict
+            Optional arguments of ``eval`` and ``query`` functions.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        if isinstance(expr, str) and expr == "":
+            raise ValueError("expr cannot be an empty string")
+
+        if isinstance(expr, str) and "not" in expr:
+            if "parser" in kwargs and kwargs["parser"] == "python":
+                ErrorMessage.not_implemented()  # pragma: no cover
+
+    def _reduce_dimension(self, query_compiler):
+        """
+        Reduce the dimension of data from the `query_compiler`.
+
+        Parameters
+        ----------
+        query_compiler : BaseQueryCompiler
+            Query compiler to retrieve the data.
+
+        Returns
+        -------
+        Series
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return Series(query_compiler=query_compiler)
+
+    def _set_axis_name(self, name, axis=0, inplace=False):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        axis = self._get_axis_number(axis)
+        renamed = self if inplace else self.copy()
+        if axis == 0:
+            renamed.index = renamed.index.set_names(name)
+        else:
+            renamed.columns = renamed.columns.set_names(name)
+        if not inplace:
+            return renamed
+
+    def _to_datetime(self, **kwargs):
+        """
+        Convert `self` to datetime.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Optional arguments to use during query compiler's
+            `to_datetime` invocation.
+
+        Returns
+        -------
+        Series of datetime64 dtype
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return self._reduce_dimension(
+            query_compiler=self._query_compiler.dataframe_to_datetime(**kwargs)
+        )
+
+    # Persistance support methods - BEGIN
+    @classmethod
+    def _inflate_light(cls, query_compiler):
+        """
+        Re-creates the object from previously-serialized lightweight representation.
+
+        The method is used for faster but not disk-storable persistence.
+
+        Parameters
+        ----------
+        query_compiler : BaseQueryCompiler
+            Query compiler to use for object re-creation.
+
+        Returns
+        -------
+        DataFrame
+            New ``DataFrame`` based on the `query_compiler`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return cls(query_compiler=query_compiler)
+
+    @classmethod
+    def _inflate_full(cls, pandas_df):
+        """
+        Re-creates the object from previously-serialized disk-storable representation.
+
+        Parameters
+        ----------
+        pandas_df : pandas.DataFrame
+            Data to use for object re-creation.
+
+        Returns
+        -------
+        DataFrame
+            New ``DataFrame`` based on the `pandas_df`.
+        """
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        return cls(data=from_pandas(pandas_df))
+
+    def __reduce__(self):
+        # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        self._query_compiler.finalize()
+        # if PersistentPickle.get():
+        #    return self._inflate_full, (self._to_pandas(),)
+        return self._inflate_light, (self._query_compiler,)
+
+    # Persistance support methods - END
diff --git a/src/snowflake/snowpark/modin/pandas/general.py b/src/snowflake/snowpark/modin/pandas/general.py
new file mode 100644
index 00000000000..d0ac54caeea
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/general.py
@@ -0,0 +1,2387 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement pandas general API."""
+from __future__ import annotations
+
+from collections.abc import Hashable, Iterable, Mapping, Sequence
+from datetime import date, datetime, tzinfo
+from logging import getLogger
+from typing import TYPE_CHECKING, Any, Literal, Union
+
+import numpy as np
+import pandas
+import pandas.core.common as common
+from pandas import IntervalIndex, NaT, Timestamp
+from pandas._libs import NaTType, lib
+from pandas._libs.tslibs import to_offset
+from pandas._typing import (
+    AnyArrayLike,
+    Axis,
+    DateTimeErrorChoices,
+    IndexLabel,
+    IntervalClosedType,
+    Scalar,
+    Suffixes,
+)
+from pandas.core.arrays import datetimelike
+from pandas.core.arrays.datetimes import (
+    _infer_tz_from_endpoints,
+    _maybe_normalize_endpoints,
+)
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.inference import is_array_like
+from pandas.core.tools.datetimes import (
+    ArrayConvertible,
+    DatetimeScalar,
+    DatetimeScalarOrArrayConvertible,
+    DictConvertible,
+)
+from pandas.util._validators import validate_inclusive
+
+# add this line to make doctests runnable
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.pandas.base import BasePandasDataset
+from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+from snowflake.snowpark.modin.pandas.series import Series
+from snowflake.snowpark.modin.pandas.utils import (
+    is_scalar,
+    raise_if_native_pandas_objects,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_standalone_function_decorator,
+)
+from snowflake.snowpark.modin.plugin._internal.timestamp_utils import (
+    VALID_TO_DATETIME_UNIT,
+)
+from snowflake.snowpark.modin.plugin._typing import ListLike, ListLikeOfFloats
+from snowflake.snowpark.modin.plugin.compiler import BaseQueryCompiler
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import _inherit_docstrings, to_pandas
+
+if TYPE_CHECKING:
+    # To prevent cross-reference warnings when building documentation and prevent erroneously
+    # linking to `snowflake.snowpark.DataFrame`, we need to explicitly
+    # qualify return types in this file with `snowflake.snowpark.modin.pandas.DataFrame`.
+    # SNOW-1233342: investigate how to fix these links without using absolute paths
+    import snowflake  # pragma: no cover
+
+_logger = getLogger(__name__)
+
+VALID_DATE_TYPE = Union[
+    np.integer, float, str, date, datetime, np.datetime64, pd.Timestamp
+]
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.isna, apilink="pandas.isna")
+def isna(obj):  # noqa: PR01, RT01, D200
+    """
+    Detect missing values for an array-like object.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    if isinstance(obj, BasePandasDataset):
+        return obj.isna()
+    else:
+        return pandas.isna(obj)
+
+
+isnull = isna
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.notna, apilink="pandas.notna")
+def notna(obj):  # noqa: PR01, RT01, D200
+    """
+    Detect non-missing values for an array-like object.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    if isinstance(obj, BasePandasDataset):
+        return obj.notna()
+    else:
+        return pandas.notna(obj)
+
+
+notnull = notna
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def merge(
+    left: snowflake.snowpark.modin.pandas.DataFrame | Series,
+    right: snowflake.snowpark.modin.pandas.DataFrame | Series,
+    how: str | None = "inner",
+    on: IndexLabel | None = None,
+    left_on: None
+    | (Hashable | AnyArrayLike | Sequence[Hashable | AnyArrayLike]) = None,
+    right_on: None
+    | (Hashable | AnyArrayLike | Sequence[Hashable | AnyArrayLike]) = None,
+    left_index: bool | None = False,
+    right_index: bool | None = False,
+    sort: bool | None = False,
+    suffixes: Suffixes | None = ("_x", "_y"),
+    copy: bool | None = True,
+    indicator: bool | str | None = False,
+    validate: str | None = None,
+):
+    """
+    Merge DataFrame or named Series objects with a database-style join.
+
+    A named Series object is treated as a DataFrame with a single named column.
+
+    The join is done on columns or indexes. If joining columns on
+    columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
+    on indexes or indexes on a column or columns, the index will be passed on.
+    When performing a cross merge, no column specifications to merge on are
+    allowed.
+
+    .. warning::
+
+        If both key columns contain rows where the key is a null value, those
+        rows will be matched against each other. This is different from usual SQL
+        join behaviour and can lead to unexpected results.
+
+    Parameters
+    ----------
+    left : :class:`~snowflake.snowpark.modin.pandas.DataFrame` or named Series
+    right : :class:`~snowflake.snowpark.modin.pandas.DataFrame` or named Series
+        Object to merge with.
+    how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
+        Type of merge to be performed.
+
+        * left: use only keys from left frame, similar to a SQL left outer join;
+          preserve key order.
+        * right: use only keys from right frame, similar to a SQL right outer join;
+          preserve key order.
+        * outer: use union of keys from both frames, similar to a SQL full outer
+          join; sort keys lexicographically.
+        * inner: use intersection of keys from both frames, similar to a SQL inner
+          join; preserve the order of the left keys.
+        * cross: creates the cartesian product from both frames, preserves the order
+          of the left keys.
+
+    on : label or list
+        Column or index level names to join on. These must be found in both
+        DataFrames. If `on` is None and not merging on indexes then this defaults
+        to the intersection of the columns in both DataFrames.
+    left_on : label or list, or array-like
+        Column or index level names to join on in the left DataFrame. Can also
+        be an array or list of arrays of the length of the left DataFrame.
+        These arrays are treated as if they are columns.
+    right_on : label or list, or array-like
+        Column or index level names to join on in the right DataFrame. Can also
+        be an array or list of arrays of the length of the right DataFrame.
+        These arrays are treated as if they are columns.
+    left_index : bool, default False
+        Use the index from the left DataFrame as the join key(s). If it is a
+        MultiIndex, the number of keys in the other DataFrame (either the index
+        or a number of columns) must match the number of levels.
+    right_index : bool, default False
+        Use the index from the right DataFrame as the join key. Same caveats as
+        left_index.
+    sort : bool, default False
+        Sort the join keys lexicographically in the result DataFrame. If False,
+        the order of the join keys depends on the join type (how keyword).
+    suffixes : list-like, default is ("_x", "_y")
+        A length-2 sequence where each element is optionally a string
+        indicating the suffix to add to overlapping column names in
+        `left` and `right` respectively. Pass a value of `None` instead
+        of a string to indicate that the column name from `left` or
+        `right` should be left as-is, with no suffix. At least one of the
+        values must not be None.
+    copy : bool, default True
+        This argument is ignored in Snowpark pandas API.
+    indicator : bool or str, default False
+        If True, adds a column to the output DataFrame called "_merge" with
+        information on the source of each row. The column can be given a different
+        name by providing a string argument. The column will have a Categorical
+        type with the value of "left_only" for observations whose merge key only
+        appears in the left DataFrame, "right_only" for observations
+        whose merge key only appears in the right DataFrame, and "both"
+        if the observation's merge key is found in both DataFrames.
+
+    validate : str, optional
+        This is not yet supported.
+
+    Returns
+    -------
+    :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+        A DataFrame of the two merged objects.
+
+    See Also
+    --------
+    merge_ordered : Merge with optional filling/interpolation.
+    merge_asof : Merge on nearest keys.
+    DataFrame.join : Similar method using indices.
+
+    Examples
+    --------
+    >>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
+    ...                     'value': [1, 2, 3, 5]})
+    >>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
+    ...                     'value': [5, 6, 7, 8]})
+    >>> df1
+      lkey  value
+    0  foo      1
+    1  bar      2
+    2  baz      3
+    3  foo      5
+    >>> df2
+      rkey  value
+    0  foo      5
+    1  bar      6
+    2  baz      7
+    3  foo      8
+
+    Merge df1 and df2 on the lkey and rkey columns. The value columns have
+    the default suffixes, _x and _y, appended.
+
+    >>> df1.merge(df2, left_on='lkey', right_on='rkey')
+      lkey  value_x rkey  value_y
+    0  foo        1  foo        5
+    1  foo        1  foo        8
+    2  bar        2  bar        6
+    3  baz        3  baz        7
+    4  foo        5  foo        5
+    5  foo        5  foo        8
+
+    Merge DataFrames df1 and df2 with specified left and right suffixes
+    appended to any overlapping columns.
+
+    >>> df1.merge(df2, left_on='lkey', right_on='rkey',
+    ...           suffixes=('_left', '_right'))
+      lkey  value_left rkey  value_right
+    0  foo           1  foo            5
+    1  foo           1  foo            8
+    2  bar           2  bar            6
+    3  baz           3  baz            7
+    4  foo           5  foo            5
+    5  foo           5  foo            8
+
+
+    >>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
+    >>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
+    >>> df1
+         a  b
+    0  foo  1
+    1  bar  2
+    >>> df2
+         a  c
+    0  foo  3
+    1  baz  4
+
+    >>> df1.merge(df2, how='inner', on='a')
+         a  b  c
+    0  foo  1  3
+
+    >>> df1.merge(df2, how='left', on='a')
+         a  b    c
+    0  foo  1  3.0
+    1  bar  2  NaN
+
+    >>> df1 = pd.DataFrame({'left': ['foo', 'bar']})
+    >>> df2 = pd.DataFrame({'right': [7, 8]})
+    >>> df1
+      left
+    0  foo
+    1  bar
+    >>> df2
+       right
+    0      7
+    1      8
+
+    >>> df1.merge(df2, how='cross')
+      left  right
+    0  foo      7
+    1  foo      8
+    2  bar      7
+    3  bar      8
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    # Raise error if 'left' or 'right' is native pandas object.
+    raise_if_native_pandas_objects(left)
+    raise_if_native_pandas_objects(right)
+
+    if isinstance(left, Series):
+        if left.name is None:
+            raise ValueError("Cannot merge a Series without a name")
+        else:
+            left = left.to_frame()
+
+    if not isinstance(left, DataFrame):
+        raise TypeError(
+            f"Can only merge Series or DataFrame objects, a {type(left)} was passed"
+        )
+
+    return left.merge(
+        right,
+        how=how,
+        on=on,
+        left_on=left_on,
+        right_on=right_on,
+        left_index=left_index,
+        right_index=right_index,
+        sort=sort,
+        suffixes=suffixes,
+        copy=copy,
+        indicator=indicator,
+        validate=validate,
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.merge_ordered, apilink="pandas.merge_ordered")
+def merge_ordered(
+    left,
+    right,
+    on=None,
+    left_on=None,
+    right_on=None,
+    left_by=None,
+    right_by=None,
+    fill_method=None,
+    suffixes=("_x", "_y"),
+    how: str = "outer",
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Perform a merge for ordered data with optional filling/interpolation.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if not isinstance(left, DataFrame):
+        raise ValueError(f"can not merge DataFrame with instance of type {type(right)}")
+    if isinstance(right, DataFrame):
+        right = to_pandas(right)
+    return DataFrame(
+        pandas.merge_ordered(
+            to_pandas(left),
+            right,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_by=left_by,
+            right_by=right_by,
+            fill_method=fill_method,
+            suffixes=suffixes,
+            how=how,
+        )
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.merge_asof, apilink="pandas.merge_asof")
+def merge_asof(
+    left,
+    right,
+    on=None,
+    left_on=None,
+    right_on=None,
+    left_index: bool = False,
+    right_index: bool = False,
+    by=None,
+    left_by=None,
+    right_by=None,
+    suffixes=("_x", "_y"),
+    tolerance=None,
+    allow_exact_matches: bool = True,
+    direction: str = "backward",
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Perform a merge by key distance.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if not isinstance(left, DataFrame):
+        raise ValueError(f"can not merge DataFrame with instance of type {type(right)}")
+
+    # As of pandas 1.2 these should raise an error; before that it did
+    # something likely random:
+    if (
+        (on and (left_index or right_index))
+        or (left_on and left_index)
+        or (right_on and right_index)
+    ):
+        raise ValueError("Can't combine left/right_index with left/right_on or on.")
+
+    if on is not None:
+        if left_on is not None or right_on is not None:
+            raise ValueError("If 'on' is set, 'left_on' and 'right_on' can't be set.")
+        left_on = on
+        right_on = on
+
+    if by is not None:
+        if left_by is not None or right_by is not None:
+            raise ValueError("Can't have both 'by' and 'left_by' or 'right_by'")
+        left_by = right_by = by
+
+    if left_on is None and not left_index:
+        raise ValueError("Must pass on, left_on, or left_index=True")
+
+    if right_on is None and not right_index:
+        raise ValueError("Must pass on, right_on, or right_index=True")
+
+    return DataFrame(
+        query_compiler=left._query_compiler.merge_asof(
+            right._query_compiler,
+            left_on,
+            right_on,
+            left_index,
+            right_index,
+            left_by,
+            right_by,
+            suffixes,
+            tolerance,
+            allow_exact_matches,
+            direction,
+        )
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def pivot_table(
+    data,
+    values=None,
+    index=None,
+    columns=None,
+    aggfunc="mean",
+    fill_value=None,
+    margins=False,
+    dropna=True,
+    margins_name="All",
+    observed=False,
+    sort=True,
+):
+    """
+    Create a spreadsheet-style pivot table as a ``DataFrame``.
+
+    The levels in the pivot table will be stored in MultiIndex objects
+    (hierarchical indexes) on the index and columns of the result DataFrame.
+
+    Parameters
+    ----------
+    values : list-like or scalar, optional
+        Column or columns to aggregate.
+    index : column, Grouper, array, or list of the previous
+        Keys to group by on the pivot table index. If a list is passed,
+        it can contain any of the other types (except list). If an array is
+        passed, it must be the same length as the data and will be used in
+        the same manner as column values.
+    columns : column, Grouper, array, or list of the previous
+        Keys to group by on the pivot table column. If a list is passed,
+        it can contain any of the other types (except list). If an array is
+        passed, it must be the same length as the data and will be used in
+        the same manner as column values.
+    aggfunc : function, list of functions, dict in string, default "mean".
+        If a list of functions is passed, the resulting pivot table will have
+        hierarchical columns whose top level are the function names
+        (inferred from the function objects themselves).
+        If a dict is passed, the key is column to aggregate and the value is
+        function or list of functions. If ``margin=True``, aggfunc will be
+        used to calculate the partial aggregates.
+    fill_value : scalar, default None
+        Value to replace missing values with (in the resulting pivot table,
+        after aggregation).
+    margins : bool, default False
+        If ``margins=True``, special ``All`` columns and rows
+        will be added with partial group aggregates across the categories
+        on the rows and columns.
+    dropna : bool, default True
+        Do not include columns whose entries are all NaN. If True,
+        rows with a NaN value in any column will be omitted before
+        computing margins.
+    margins_name : str, default 'All'
+        Name of the row / column that will contain the totals
+        when margins is True.
+    observed : bool, default False
+        This only applies if any of the groupers are Categoricals.
+        Categoricals are not yet implemented in Snowpark pandas.
+        If True: only show observed values for categorical groupers.
+        If False: show all values for categorical groupers.
+
+    sort : bool, default True
+        Specifies if the result should be sorted.
+
+    Returns
+    -------
+    Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+        An Excel style pivot table.
+
+    Notes
+    -----
+    Raise NotImplementedError if
+
+        * margins, observed, or sort is given;
+        * or index, columns, or values is not str;
+        * or DataFrame contains MultiIndex;
+        * or any argfunc is not "count", "mean", "min", "max", or "sum"
+
+    See Also
+    --------
+    DataFrame.pivot : Pivot without aggregation that can handle
+        non-numeric data.
+    DataFrame.melt: Unpivot a DataFrame from wide to long format,
+        optionally leaving identifiers set.
+    wide_to_long : Wide panel to long format. Less flexible but more
+        user-friendly than melt.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
+    ...                          "bar", "bar", "bar", "bar"],
+    ...                    "B": ["one", "one", "one", "two", "two",
+    ...                          "one", "one", "two", "two"],
+    ...                    "C": ["small", "large", "large", "small",
+    ...                          "small", "large", "small", "small",
+    ...                          "large"],
+    ...                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+    ...                    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
+    >>> df
+         A    B      C  D  E
+    0  foo  one  small  1  2
+    1  foo  one  large  2  4
+    2  foo  one  large  2  5
+    3  foo  two  small  3  5
+    4  foo  two  small  3  6
+    5  bar  one  large  4  6
+    6  bar  one  small  5  8
+    7  bar  two  small  6  9
+    8  bar  two  large  7  9
+
+    This first example aggregates values by taking the sum.
+
+    >>> table = pd.pivot_table(df, values='D', index=['A', 'B'],
+    ...                        columns=['C'], aggfunc="sum")
+    >>> table  # doctest: +NORMALIZE_WHITESPACE
+    C        large  small
+    A   B
+    bar one    4.0      5
+        two    7.0      6
+    foo one    4.0      1
+        two    NaN      6
+
+    We can also fill missing values using the `fill_value` parameter.
+
+    >>> table = pd.pivot_table(df, values='D', index=['A', 'B'],
+    ...                        columns=['C'], aggfunc="sum", fill_value=0)
+    >>> table  # doctest: +NORMALIZE_WHITESPACE
+    C        large  small
+    A   B
+    bar one    4.0      5
+        two    7.0      6
+    foo one    4.0      1
+        two    NaN      6
+
+    >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
+    ...                        aggfunc={'D': "mean", 'E': "mean"})
+    >>> table  # doctest: +NORMALIZE_WHITESPACE
+                      D         E
+                      D         E
+    A   C
+    bar large  5.500000  7.500000
+        small  5.500000  8.500000
+    foo large  2.000000  4.500000
+        small  2.333333  4.333333
+
+    >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
+    ...                        aggfunc={'D': "mean",
+    ...                                 'E': ["min", "max", "mean"]})
+    >>> table  # doctest: +NORMALIZE_WHITESPACE
+                      D   E
+                   mean max      mean min
+                      D   E         E   E
+    A   C
+    bar large  5.500000   9  7.500000   6
+        small  5.500000   9  8.500000   8
+    foo large  2.000000   5  4.500000   4
+        small  2.333333   6  4.333333   2
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    if not isinstance(data, DataFrame):
+        raise ValueError(
+            f"can not create pivot table with instance of type {type(data)}"
+        )
+
+    return data.pivot_table(
+        values=values,
+        index=index,
+        columns=columns,
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+        margins=margins,
+        dropna=dropna,
+        margins_name=margins_name,
+        sort=sort,
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.pivot, apilink="pandas.pivot")
+def pivot(data, index=None, columns=None, values=None):  # noqa: PR01, RT01, D200
+    """
+    Return reshaped DataFrame organized by given index / column values.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if not isinstance(data, DataFrame):
+        raise ValueError(f"can not pivot with instance of type {type(data)}")
+    return data.pivot(index=index, columns=columns, values=values)
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_numeric(
+    arg: Scalar | Series | ArrayConvertible,
+    errors: Literal["ignore", "raise", "coerce"] = "raise",
+    downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
+) -> Series | Scalar | None:
+    # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+    """
+    Convert argument to a numeric type.
+
+    If the input arg type is already a numeric type, the return dtype
+    will be the original type; otherwise, the return dtype is float.
+
+    Parameters
+    ----------
+    arg : scalar, list, tuple, 1-d array, or Series
+        Argument to be converted.
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception.
+        - If 'coerce', then invalid parsing will be set as NaN.
+        - If 'ignore', then invalid parsing will return the input.
+    downcast : str, default None
+        downcast is ignored in Snowflake backend.
+
+    Returns
+    -------
+    ret
+        Numeric if parsing succeeded.
+        Return type depends on input.  Series if `arg` is not scalar.
+
+    See Also
+    --------
+    DataFrame.astype : Cast argument to a specified dtype.
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    numpy.ndarray.astype : Cast a numpy array to a specified type.
+    DataFrame.convert_dtypes : Convert dtypes.
+
+    Examples
+    --------
+    Take separate series and convert to numeric, coercing when told to
+
+    >>> s = pd.Series(['1.0', '2', -3])
+    >>> pd.to_numeric(s)
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+
+    Note: to_numeric always converts non-numeric values to floats
+    >>> s = pd.Series(['1', '2', '-3'])
+    >>> pd.to_numeric(s)
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> pd.to_numeric(s, downcast='float')  # downcast is ignored
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> pd.to_numeric(s, downcast='signed')  # downcast is ignored
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> s = pd.Series(['apple', '1.0', '2', -3])
+    >>> pd.to_numeric(s, errors='ignore')  # doctest: +SKIP
+    0    apple
+    1      1.0
+    2        2
+    3       -3
+    dtype: object
+    >>> pd.to_numeric(s, errors='coerce')
+    0    NaN
+    1    1.0
+    2    2.0
+    3   -3.0
+    dtype: float64
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    raise_if_native_pandas_objects(arg)
+    if errors not in ("ignore", "raise", "coerce"):
+        raise ValueError("invalid error value specified")
+    if downcast is not None:
+        WarningMessage.ignored_argument(
+            operation="to_numeric",
+            argument="downcast",
+            message="downcast is ignored in Snowflake backend",
+        )
+    # convert arg to series
+    arg_is_scalar = is_scalar(arg)
+
+    if (
+        not arg_is_scalar
+        and not isinstance(arg, (list, tuple, Series))
+        and not (is_array_like(arg) and arg.ndim == 1)
+    ):
+        raise TypeError("arg must be a list, tuple, 1-d array, or Series")
+
+    if arg_is_scalar:
+        arg = Series([arg])
+
+    if not isinstance(arg, Series):
+        name = None
+        # keep index name
+        if isinstance(arg, pandas.Index):
+            name = arg.name
+        arg = Series(arg, name=name)
+
+    ret = arg._to_numeric(errors=errors)
+    if arg_is_scalar:
+        # use squeeze to turn the series result into a scalar
+        ret = ret.squeeze()
+    return ret
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def unique(values) -> np.ndarray:
+    """
+    Return unique values based on a hash table. Unique values are
+    returned in the order of appearance. This does NOT sort.
+
+    Parameters
+    ----------
+    values : ndarray (1-d), list, bytearray, tuple, Series, Index, list-like
+        Non-hashable objects like set, dict, and user defined classes are
+        invalid input.
+        Values to perform computation.
+
+    Returns
+    -------
+    ndarray
+    The unique values returned as a NumPy array. See Notes.
+
+    See Also
+    --------
+    Series.unique()
+
+    Notes
+    -----
+    Returns the unique values as a NumPy array. This includes
+
+        * Datetime with Timezone
+        * IntegerNA
+
+    See Examples section.
+
+    Examples
+    --------
+    >>> pd.unique([2, 1, 3, 3])
+    array([2, 1, 3])
+
+    >>> pd.unique([pd.Timestamp('2016-01-01', tz='US/Eastern')
+    ...            for _ in range(3)])
+    array([Timestamp('2015-12-31 21:00:00-0800', tz='America/Los_Angeles')],
+          dtype=object)
+
+    >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")])
+    array([list(['a', 'b']), list(['b', 'a']), list(['a', 'c'])], dtype=object)
+
+    >>> pd.unique([None, np.nan, 2])
+    array([nan,  2.])
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    if is_list_like(values) and not isinstance(values, dict):
+        return Series(values).unique()
+    else:
+        raise TypeError("Only list-like objects can be used with unique()")
+
+
+# Adding docstring since pandas docs don't have web section for this function.
+@snowpark_pandas_telemetry_standalone_function_decorator
+def value_counts(
+    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
+):
+    """
+    Compute a histogram of the counts of non-null values.
+
+    Parameters
+    ----------
+    values : ndarray (1-d)
+        Values to perform computation.
+    sort : bool, default: True
+        Sort by values.
+    ascending : bool, default: False
+        Sort in ascending order.
+    normalize : bool, default: False
+        If True then compute a relative histogram.
+    bins : integer, optional
+        Rather than count values, group them into half-open bins,
+        convenience for pd.cut, only works with numeric data.
+    dropna : bool, default: True
+        Don't include counts of NaN.
+
+    Returns
+    -------
+    Series
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    return Series(values).value_counts(
+        sort=sort,
+        ascending=ascending,
+        normalize=normalize,
+        bins=bins,
+        dropna=dropna,
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def concat(
+    objs: (
+        Iterable[snowflake.snowpark.modin.pandas.DataFrame | Series]
+        | Mapping[Hashable, snowflake.snowpark.modin.pandas.DataFrame | Series]
+    ),
+    axis: Axis = 0,
+    join: str = "outer",
+    ignore_index: bool = False,
+    keys: Sequence[Hashable] = None,
+    levels: list[Sequence[Hashable]] = None,
+    names: list[Hashable] = None,
+    verify_integrity: bool = False,
+    sort: bool = False,
+    copy: bool = True,
+) -> snowflake.snowpark.modin.pandas.DataFrame | Series:
+    """
+    Concatenate pandas objects along a particular axis.
+
+    Allows optional set logic along the other axes.
+
+    Can also add a layer of hierarchical indexing on the concatenation axis,
+    which may be useful if the labels are the same (or overlapping) on
+    the passed axis number.
+
+    Parameters
+    ----------
+    objs : a sequence or mapping of Series or DataFrame objects
+        If a mapping is passed, the sorted keys will be used as the `keys`
+        argument, unless it is passed, in which case the values will be
+        selected (see below). Any None objects will be dropped silently unless
+        they are all None in which case a ValueError will be raised.
+    axis : {0/'index', 1/'columns'}, default 0
+        The axis to concatenate along.
+    join : {'inner', 'outer'}, default 'outer'
+        How to handle indexes on other axis (or axes).
+    ignore_index : bool, default False
+        If True, do not use the index values along the concatenation axis. The
+        resulting axis will be labeled 0, ..., n - 1. This is useful if you are
+        concatenating objects where the concatenation axis does not have
+        meaningful indexing information. Note the index values on the other
+        axes are still respected in the join.
+    keys : sequence, default None
+        If multiple levels passed, should contain tuples. Construct
+        hierarchical index using the passed keys as the outermost level.
+    levels : list of sequences, default None
+        Specific levels (unique values) to use for constructing a
+        MultiIndex. Otherwise they will be inferred from the keys.
+        Snowpark pandas does not support 'levels' argument.
+    names : list, default None
+        Names for the levels in the resulting hierarchical index.
+    verify_integrity : bool, default False
+        Check whether the new concatenated axis contains duplicates.
+        Snowpark pandas does not support distributed computation of concat when
+        'verify_integrity' is True.
+    sort : bool, default False
+        Sort non-concatenation axis if it is not already aligned.
+    copy : bool, default True
+        If False, do not copy data unnecessarily.
+        This argument is ignored in Snowpark pandas.
+
+    Returns
+    -------
+    object, type of objs
+        When concatenating all Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` along the index (axis=0),
+        a Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` is returned. When ``objs`` contains at least
+        one Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`,
+        a Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` is returned. When concatenating along
+        the columns (axis=1), a Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` is returned.
+
+    See Also
+    --------
+    DataFrame.join : Join DataFrames using indexes.
+    DataFrame.merge : Merge DataFrames by indexes or columns.
+
+    Notes
+    -----
+    The keys, levels, and names arguments are all optional.
+
+    It is not recommended to build DataFrames by adding single rows in a
+    for loop. Build a list of rows and make a DataFrame in a single concat.
+
+    Examples
+    --------
+    Combine two ``Series``.
+
+    >>> s1 = pd.Series(['a', 'b'])
+    >>> s2 = pd.Series(['c', 'd'])
+    >>> pd.concat([s1, s2])
+    0    a
+    1    b
+    0    c
+    1    d
+    dtype: object
+
+    Clear the existing index and reset it in the result
+    by setting the ``ignore_index`` option to ``True``.
+
+    >>> pd.concat([s1, s2], ignore_index=True)
+    0    a
+    1    b
+    2    c
+    3    d
+    dtype: object
+
+    Add a hierarchical index at the outermost level of
+    the data with the ``keys`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'])
+    s1  0    a
+        1    b
+    s2  0    c
+        1    d
+    dtype: object
+
+    Label the index keys you create with the ``names`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'],
+    ...           names=['Series name', 'Row ID'])
+    Series name  Row ID
+    s1           0         a
+                 1         b
+    s2           0         c
+                 1         d
+    dtype: object
+
+    Combine two ``DataFrame`` objects with identical columns.
+
+    >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
+    ...                    columns=['letter', 'number'])
+    >>> df1
+      letter  number
+    0      a       1
+    1      b       2
+    >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
+    ...                    columns=['letter', 'number'])
+    >>> df2
+      letter  number
+    0      c       3
+    1      d       4
+    >>> pd.concat([df1, df2])
+      letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return everything. Columns outside the intersection will
+    be filled with ``NaN`` values.
+
+    >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
+    ...                    columns=['letter', 'number', 'animal'])
+    >>> df3
+      letter  number animal
+    0      c       3    cat
+    1      d       4    dog
+    >>> pd.concat([df1, df3], sort=False)
+      letter  number animal
+    0      a       1   None
+    1      b       2   None
+    0      c       3    cat
+    1      d       4    dog
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return only those that are shared by passing ``inner`` to
+    the ``join`` keyword argument.
+
+    >>> pd.concat([df1, df3], join="inner")
+      letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects horizontally along the x axis by
+    passing in ``axis=1``.
+
+    >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
+    ...                    columns=['animal', 'name'])
+    >>> pd.concat([df1, df4], axis=1)
+      letter  number  animal    name
+    0      a       1    bird   polly
+    1      b       2  monkey  george
+
+    Combining series horizontally creates a DataFrame. Missing names are replaced with
+    numeric values.
+
+    >>> pd.concat([s1, s2], axis=1)
+       0  1
+    0  a  c
+    1  b  d
+
+    When combining objects horizoantally ``ignore_index=True`` will clear the existing
+    column names and reset it in the result.
+
+    >>> pd.concat([df1, df4], axis=1, ignore_index=True)
+       0  1       2       3
+    0  a  1    bird   polly
+    1  b  2  monkey  george
+
+    When combining objects horizontally, add a hierarchical column index at the
+    outermost level of the column labels with the ``keys`` option.
+
+    >>> pd.concat([df1, df4], axis=1, keys=['x', 'y']) # doctest: +NORMALIZE_WHITESPACE
+           x              y
+      letter number  animal    name
+    0      a      1    bird   polly
+    1      b      2  monkey  george
+
+    Concatenatiing series horizontally with ``keys``.
+
+    >>> pd.concat([s1, s2], axis=1, keys=['x', 'y'])
+       x  y
+       0  1
+    0  a  c
+    1  b  d
+
+    When combining objects horizontally, ``how='inner'`` to keep only overalpping
+    index values.
+
+    >>> df5 = pd.DataFrame([['a', 1], ['b', 2]],
+    ...                    columns=['letter', 'number'],
+    ...                    index=[1, 2])
+    >>> df5
+      letter  number
+    1      a       1
+    2      b       2
+    >>> pd.concat([df1, df5], axis=1, join='inner')
+      letter  number letter  number
+    1      b       2      a       1
+
+    Prevent the result from including duplicate index values with the
+    ``verify_integrity`` option.
+
+    >>> df5 = pd.DataFrame([1], index=['a'])
+    >>> df5
+       0
+    a  1
+    >>> df6 = pd.DataFrame([2], index=['a'])
+    >>> df6
+       0
+    a  2
+    >>> pd.concat([df5, df6], verify_integrity=True)
+    Traceback (most recent call last):
+        ...
+    ValueError: Indexes have overlapping values: Index(['a'], dtype='object')
+
+    Append a single row to the end of a ``DataFrame`` object.
+
+    >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+    >>> df7
+       a  b
+    0  1  2
+    >>> new_row = pd.DataFrame({'a': 3, 'b': 4}, index=[0])
+    >>> new_row
+       a  b
+    0  3  4
+    >>> pd.concat([df7, new_row], ignore_index=True)
+       a  b
+    0  1  2
+    1  3  4
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    # Raise error if native pandas objects are passed.
+    raise_if_native_pandas_objects(objs)
+
+    # In native pandas 'concat' API is expected to work with all types of iterables like
+    # tuples, list, generators, custom iterators, deque etc.
+    # Few exceptions are 'DataFrame', 'Series', 'str', these are also technically
+    # iterables, but they are not iterables of pandas objects.
+    # Note other iterables can also have non pandas objects as element in them, but it's
+    # not possible to know that in advance without iterating over all objects, so we
+    # also individual element later.
+
+    # Raise error if 'objs' is not an iterable or an iterable of non-pandas objects.
+    if not isinstance(objs, Iterable) or isinstance(
+        objs, (pd.DataFrame, pd.Series, str)
+    ):
+        # Same error as native pandas.
+        raise TypeError(
+            "first argument must be an iterable of pandas "
+            f'objects, you passed an object of type "{type(objs).__name__}"'
+        )
+
+    if isinstance(objs, dict):
+        if keys is None:
+            keys = list(objs.keys())
+        # if 'keys' is not none, filter out additional objects from mapping.
+        objs = [objs[k] for k in keys]
+    else:
+        # Native pandas also supports generators as input, that can only be iterated
+        # only once so first create a list from 'objs'.
+        objs = list(objs)
+
+    for obj in objs:
+        # Raise error if native pandas objects are passed.
+        raise_if_native_pandas_objects(obj)
+
+    if join not in ("inner", "outer"):
+        # Same error as native pandas.
+        raise ValueError(
+            "Only can inner (intersect) or outer (union) join the other axis"
+        )
+
+    axis = pandas.DataFrame()._get_axis_number(axis)
+
+    if len(objs) == 0:
+        # Same error as native pandas.
+        raise ValueError("No objects to concatenate")
+
+    # Filter out None objects
+    if keys is None:
+        objs = [o for o in objs if o is not None]
+    else:
+        tuples = [(k, v) for k, v in zip(keys, objs) if v is not None]
+        # convert list of tuples to tuples of list.
+        keys, objs = list(map(list, zip(*tuples))) if tuples else ([], [])
+
+    if len(objs) == 0:
+        # Same error as native pandas.
+        raise ValueError("All objects passed were None")
+
+    for obj in objs:
+        # Same error as native pandas.
+        if not isinstance(obj, (Series, DataFrame)):
+            raise TypeError(
+                f"cannot concatenate object of type '{type(obj)}'; "
+                "only Series and DataFrame objs are valid"
+            )
+
+    # Assign names to unnamed series
+    series_name = 0
+    for i, obj in enumerate(objs):
+        if isinstance(obj, pd.Series) and obj.name is None:
+            objs[i] = obj.rename(series_name)
+            series_name = series_name + 1
+
+    # Check if all objects are of Series types.
+    all_series = all([isinstance(obj, pd.Series) for obj in objs])
+    # When concatenating Series objects on axis 0, pandas tries to preserve name from
+    # input if all have same name otherwise set it to None.
+    if all_series and axis == 0:
+        unique_names = {obj.name for obj in objs}
+        name = objs[0].name if len(unique_names) == 1 else None
+        objs = [obj.rename(name) for obj in objs]
+
+    if not copy:
+        WarningMessage.ignored_argument(
+            operation="concat",
+            argument="copy",
+            message="copy parameter has been ignored with Snowflake execution engine",
+        )
+
+    result = objs[0]._query_compiler.concat(
+        axis,
+        [o._query_compiler for o in objs[1:]],
+        join=join,
+        ignore_index=ignore_index,
+        keys=keys,
+        levels=levels,
+        names=names,
+        verify_integrity=verify_integrity,
+        sort=sort,
+    )
+    # If all objects are series and concat axis=0, return Series else return DataFrame.
+    if all_series and axis == 0:
+        return Series(query_compiler=result)
+    return DataFrame(query_compiler=result)
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_datetime(
+    arg: DatetimeScalarOrArrayConvertible
+    | DictConvertible
+    | snowflake.snowpark.modin.pandas.DataFrame
+    | Series,
+    errors: DateTimeErrorChoices = "raise",
+    dayfirst: bool = False,
+    yearfirst: bool = False,
+    utc: bool = False,
+    format: str | None = None,
+    exact: bool | lib.NoDefault = lib.no_default,
+    unit: str | None = None,
+    infer_datetime_format: lib.NoDefault | bool = lib.no_default,
+    origin: Any = "unix",
+    cache: bool = True,
+) -> Series | DatetimeScalar | NaTType | None:
+    # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+    """
+    Convert argument to datetime.
+
+    This function converts a scalar, array-like, :class:`~snowflake.snowpark.modin.pandas.Series` or
+    :class:`~snowflake.snowpark.modin.pandas.DataFrame`/dict-like to a pandas datetime object.
+
+    Parameters
+    ----------
+    arg : int, float, str, datetime, list, tuple, 1-d array, Series, :class:`~snowflake.snowpark.modin.pandas.DataFrame`/dict-like
+        The object to convert to a datetime. If a :class:`~snowflake.snowpark.modin.pandas.DataFrame` is provided, the
+        method expects minimally the following columns: :const:`"year"`,
+        :const:`"month"`, :const:`"day"`.
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If :const:`'raise'`, then invalid parsing will raise an exception.
+        - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`.
+        - If :const:`'ignore'`, then invalid parsing will return the input.
+    dayfirst : bool, default False
+        Specify a date parse order if `arg` is str or is list-like.
+        If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"`
+        is parsed as :const:`2012-11-10`.
+
+        .. warning::
+
+            ``dayfirst=True`` is not strict, but will prefer to parse
+            with day first. If a delimited date string cannot be parsed in
+            accordance with the given `dayfirst` option, e.g.
+            ``to_datetime(['31-12-2021'])``, then a warning will be shown.
+
+    yearfirst : bool, default False
+        Specify a date parse order if `arg` is str or is list-like.
+
+        - If :const:`True` parses dates with the year first, e.g.
+          :const:`"10/11/12"` is parsed as :const:`2010-11-12`.
+        - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is
+          preceded (same as :mod:`dateutil`).
+
+        .. warning::
+
+            ``yearfirst=True`` is not strict, but will prefer to parse
+            with year first.
+
+    utc : bool, default None
+        Control timezone-related parsing, localization and conversion.
+
+        - If :const:`True`, the function *always* returns a timezone-aware
+          UTC-localized :class:`Timestamp`, :class:`~snowflake.snowpark.modin.pandas.Series` or
+          :class:`DatetimeIndex`. To do this, timezone-naive inputs are
+          *localized* as UTC, while timezone-aware inputs are *converted* to UTC.
+
+        - If :const:`False` (default), inputs will not be coerced to UTC.
+          Timezone-naive inputs will remain naive, while timezone-aware ones
+          will keep their time offsets. Limitations exist for mixed
+          offsets (typically, daylight savings), see :ref:`Examples
+          <to_datetime_tz_examples>` section for details.
+
+        See also: pandas general documentation about `timezone conversion and
+        localization
+        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
+        #time-zone-handling>`_.
+
+    format : str, default None
+        The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that
+        :const:`"%f"` will parse all the way up to nanoseconds. See
+        `strftime documentation
+        <https://docs.python.org/3/library/datetime.html
+        #strftime-and-strptime-behavior>`_ for more information on choices.
+    exact : bool, default True
+        Control how `format` is used:
+
+        - If :const:`True`, require an exact `format` match.
+        - If :const:`False`, allow the `format` to match anywhere in the target
+          string.
+
+    unit : str, default 'ns'
+        The unit of the arg (D,s,ms,us,ns) denote the unit, which is an
+        integer or float number. This will be based off the origin.
+        Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate
+        the number of milliseconds to the unix epoch start.
+    infer_datetime_format : bool, default False
+        If :const:`True` and no `format` is given, attempt to infer the format
+        of the datetime strings based on the first non-NaN element,
+        and if it can be inferred, switch to a faster method of parsing them.
+        In some cases this can increase the parsing speed by ~5-10x.
+    origin : scalar, default 'unix'
+        Define the reference date. The numeric values would be parsed as number
+        of units (defined by `unit`) since this reference date.
+
+        - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01.
+        - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to
+          beginning of Julian Calendar. Julian day number :const:`0` is assigned
+          to the day starting at noon on January 1, 4713 BC.
+        - If Timestamp convertible, origin is set to Timestamp identified by
+          origin.
+    cache : bool, default True
+        cache parameter is ignored with Snowflake backend, i.e., no caching will be
+        applied
+
+    Returns
+    -------
+    datetime
+        If parsing succeeded.
+        Return type depends on input (types in parenthesis correspond to
+        fallback in case of unsuccessful timezone or out-of-range timestamp
+        parsing):
+
+        - scalar: :class:`Timestamp` (or :class:`datetime.datetime`)
+        - array-like: :class:`~snowflake.snowpark.modin.pandas.Series` with :class:`datetime64` dtype containing
+          :class:`datetime.datetime` (or
+          :class: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`object` dtype containing
+          :class:`datetime.datetime`)
+        - Series: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`datetime64` dtype (or
+          :class: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`object` dtype containing
+          :class:`datetime.datetime`)
+        - DataFrame: :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`datetime64` dtype (or
+          :class:`~snowflake.snowpark.modin.pandas.Series` of :class:`object` dtype containing
+          :class:`datetime.datetime`)
+
+    Raises
+    ------
+    ParserError
+        When parsing a date from string fails.
+    ValueError
+        When another datetime conversion error happens. For example when one
+        of 'year', 'month', day' columns is missing in a :class:`~snowflake.snowpark.modin.pandas.DataFrame`, or
+        when a Timezone-aware :class:`datetime.datetime` is found in an array-like
+        of mixed time offsets, and ``utc=False``.
+
+    See Also
+    --------
+    DataFrame.astype : Cast argument to a specified dtype.
+    to_timedelta : Convert argument to timedelta.
+    convert_dtypes : Convert dtypes.
+
+    Notes
+    -----
+
+    Many input types are supported, and lead to different output types:
+
+    - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime`
+      module or :mod:`numpy`). They are converted to :class:`Timestamp` when
+      possible, otherwise they are converted to :class:`datetime.datetime`.
+      None/NaN/null scalars are converted to :const:`NaT`.
+
+    - **array-like** can contain int, float, str, datetime objects. They are
+      converted to :class:`DatetimeIndex` when possible, otherwise they are
+      converted to :class:`Index` with :class:`object` dtype, containing
+      :class:`datetime.datetime`. None/NaN/null entries are converted to
+      :const:`NaT` in both cases.
+
+    - **Series** are converted to :class:`~snowflake.snowpark.modin.pandas.Series` with :class:`datetime64`
+      dtype when possible, otherwise they are converted to :class:`~snowflake.snowpark.modin.pandas.Series` with
+      :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null
+      entries are converted to :const:`NaT` in both cases.
+
+    - **DataFrame/dict-like** are converted to :class:`~snowflake.snowpark.modin.pandas.Series` with
+      :class:`datetime64` dtype. For each row a datetime is created from assembling
+      the various dataframe columns. Column keys can be common abbreviations
+      like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or
+      plurals of the same.
+
+    The following causes are responsible for :class:`datetime.datetime` objects
+    being returned (possibly inside an :class:`Index` or a :class:`~snowflake.snowpark.modin.pandas.Series` with
+    :class:`object` dtype) instead of a proper pandas designated type
+    (:class:`Timestamp` or :class:`~snowflake.snowpark.modin.pandas.Series` with :class:`datetime64` dtype):
+
+    - when any input element is before :const:`Timestamp.min` or after
+      :const:`Timestamp.max`, see `timestamp limitations
+      <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
+      #timeseries-timestamp-limits>`_.
+
+    - when ``utc=False`` (default) and the input is an array-like or
+      :class:`~snowflake.snowpark.modin.pandas.Series` containing mixed naive/aware datetime, or aware with mixed
+      time offsets. Note that this happens in the (quite frequent) situation when
+      the timezone has a daylight savings policy. In that case you may wish to
+      use ``utc=True``.
+
+    Examples
+    --------
+
+    **Handling various input formats**
+
+    Assembling a datetime from multiple columns of a :class:`~snowflake.snowpark.modin.pandas.DataFrame`. The keys
+    can be common abbreviations like ['year', 'month', 'day', 'minute', 'second',
+    'ms', 'us', 'ns']) or plurals of the same
+
+    >>> df = pd.DataFrame({'year': [2015, 2016],
+    ...                    'month': [2, 3],
+    ...                    'day': [4, 5]})
+    >>> pd.to_datetime(df)
+    0   2015-02-04
+    1   2016-03-05
+    dtype: datetime64[ns]
+
+    Passing ``infer_datetime_format=True`` can often-times speedup a parsing
+    if it's not an ISO8601 format exactly, but in a regular format.
+
+    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
+    >>> s.head()
+    0    3/11/2000
+    1    3/12/2000
+    2    3/13/2000
+    3    3/11/2000
+    4    3/12/2000
+    dtype: object
+
+    Using a unix epoch time
+
+    >>> pd.to_datetime(1490195805, unit='s')
+    Timestamp('2017-03-22 15:16:45')
+    >>> pd.to_datetime(1490195805433502912, unit='ns')
+    Timestamp('2017-03-22 15:16:45.433502912')
+
+    .. warning:: For float arg, precision rounding might happen. To prevent
+        unexpected behavior use a fixed-width exact type.
+
+    Using a non-unix epoch origin
+
+    >>> pd.to_datetime([1, 2, 3], unit='D',
+    ...                origin=pd.Timestamp('1960-01-01'))
+    0   1960-01-02
+    1   1960-01-03
+    2   1960-01-04
+    dtype: datetime64[ns]
+
+
+    **Non-convertible date/times**
+
+    If a date does not meet the `timestamp limitations
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
+    #timeseries-timestamp-limits>`_, passing ``errors='ignore'``
+    will return the original input instead of raising any exception.
+
+    Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`,
+    in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`.
+
+    >>> pd.to_datetime(['13000101', 'abc'], format='%Y%m%d', errors='ignore')  # doctest: +SKIP
+    0    13000101
+    1         abc
+    dtype: object
+
+    >>> pd.to_datetime(['13000101', 'abc'], format='%Y%m%d', errors='coerce')
+    0   NaT
+    1   NaT
+    dtype: datetime64[ns]
+
+
+    .. _to_datetime_tz_examples:
+
+    **Timezones and time offsets**
+
+    The default behaviour (``utc=False``) is as follows:
+
+    - Timezone-naive inputs are converted to timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series`:
+
+    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])
+    0   2018-10-26 12:00:00
+    1   2018-10-26 13:00:15
+    dtype: datetime64[ns]
+
+    - Timezone-aware inputs *with constant time offset* are still converted to
+      timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series` by default.
+
+    >>> pd.to_datetime(['2018-10-26 12:00:00 -0500', '2018-10-26 13:00:00 -0500'])
+    0   2018-10-26 12:00:00
+    1   2018-10-26 13:00:00
+    dtype: datetime64[ns]
+
+    - Use right format to convert to timezone-aware type (Note that when call Snowpark
+      pandas API to_pandas() the timezone-aware output will always be converted to session timezone):
+
+    >>> pd.to_datetime(['2018-10-26 12:00:00 -0500', '2018-10-26 13:00:00 -0500'], format="%Y-%m-%d %H:%M:%S %z")
+    0   2018-10-26 10:00:00-07:00
+    1   2018-10-26 11:00:00-07:00
+    dtype: datetime64[ns, America/Los_Angeles]
+
+    - Timezone-aware inputs *with mixed time offsets* (for example
+      issued from a timezone with daylight savings, such as Europe/Paris):
+
+    >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100'])
+    0   2020-10-25 02:00:00
+    1   2020-10-25 04:00:00
+    dtype: datetime64[ns]
+
+    >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100'], format="%Y-%m-%d %H:%M:%S %z")
+    0   2020-10-24 17:00:00-07:00
+    1   2020-10-24 20:00:00-07:00
+    dtype: datetime64[ns, America/Los_Angeles]
+
+    Setting ``utc=True`` makes sure always convert to timezone-aware outputs:
+
+    - Timezone-naive inputs are *localized* based on the session timezone
+
+    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True)
+    0   2018-10-26 12:00:00-07:00
+    1   2018-10-26 13:00:00-07:00
+    dtype: datetime64[ns, America/Los_Angeles]
+
+    - Timezone-aware inputs are *converted* to session timezone
+
+    >>> pd.to_datetime(['2018-10-26 12:00:00 -0530', '2018-10-26 12:00:00 -0500'],
+    ...                utc=True)
+    0   2018-10-26 10:30:00-07:00
+    1   2018-10-26 10:00:00-07:00
+    dtype: datetime64[ns, America/Los_Angeles]
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    raise_if_native_pandas_objects(arg)
+
+    if arg is None:
+        return None  # same as pandas
+    if unit and unit not in VALID_TO_DATETIME_UNIT:
+        raise ValueError(f"Unrecognized unit {unit}")
+
+    if not cache:
+        WarningMessage.ignored_argument(
+            operation="to_datetime",
+            argument="cache",
+            message="cache parameter is ignored with Snowflake backend, i.e., no caching will be applied",
+        )
+    arg_is_scalar = is_scalar(arg)
+    # handle empty array, list, dict
+    if not arg_is_scalar and not isinstance(arg, (DataFrame, Series)) and len(arg) == 0:
+        return arg if isinstance(arg, Series) else Series(arg)  # always return a Series
+    if not isinstance(arg, (DataFrame, Series)):
+        # turn dictionary like arg into DataFrame and list like or scalar to Series
+        if isinstance(arg, dict):
+            arg = DataFrame(arg)  # pragma: no cover
+        else:
+            name = None
+            # keep index name
+            if isinstance(arg, pandas.Index):
+                name = arg.name
+            arg = Series(arg)
+            arg.name = name
+
+    series = arg._to_datetime(
+        errors=errors,
+        dayfirst=dayfirst,
+        yearfirst=yearfirst,
+        utc=utc,
+        format=format,
+        exact=exact,
+        unit=unit,
+        infer_datetime_format=infer_datetime_format,
+        origin=origin,
+    )
+    if arg_is_scalar:
+        # Calling squeeze directly on Snowpark pandas Series makes an unnecessary
+        # count sql call. To avoid that we convert Snowpark pandas Series to Native
+        # pandas seris first.
+        return series.to_pandas().squeeze()
+    return series
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def get_dummies(
+    data,
+    prefix=None,
+    prefix_sep="_",
+    dummy_na=False,
+    columns=None,
+    sparse=False,
+    drop_first=False,
+    dtype=None,
+):  # noqa: PR01, RT01, D200
+    """
+    Convert categorical variable into dummy/indicator variables.
+
+    Parameters
+    ----------
+    data : array-like, Series, or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+        Data of which to get dummy indicators.
+    prefix : str, list of str, or dict of str, default None
+        String to append DataFrame column names.
+        Pass a list with length equal to the number of columns
+        when calling get_dummies on a DataFrame. Alternatively, `prefix`
+        can be a dictionary mapping column names to prefixes.
+        Only str, list of str and None is supported for this parameter.
+    prefix_sep : str, default '_'
+        If appending prefix, separator/delimiter to use.
+    dummy_na : bool, default False
+        Add a column to indicate NaNs, if False NaNs are ignored. Only the
+        value False is supported for this parameter.
+    columns : list-like, default None
+        Column names in the DataFrame to be encoded.
+        If `columns` is None then all the columns with
+        `string` dtype will be converted.
+    sparse : bool, default False
+        Whether the dummy-encoded columns should be backed by
+        a :class:`SparseArray` (True) or a regular NumPy array (False).
+        This parameter is ignored.
+    drop_first : bool, default False
+        Whether to get k-1 dummies out of k categorical levels by removing the
+        first level. Only the value False is supported for this parameter.
+    dtype : dtype, default np.uint8
+        Data type for new columns. Only the value None is supported for this parameter.
+
+    Returns
+    -------
+    :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+        Dummy-coded data.
+
+    Examples
+    --------
+    >>> s = pd.Series(list('abca'))
+
+    >>> pd.get_dummies(s)
+       a  b  c
+    0  1  0  0
+    1  0  1  0
+    2  0  0  1
+    3  1  0  0
+
+    >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
+    ...                    'C': [1, 2, 3]})
+
+    >>> pd.get_dummies(df, prefix=['col1', 'col2'])
+       C  col1_a  col1_b  col2_a  col2_b  col2_c
+    0  1       1       0       0       1       0
+    1  2       0       1       1       0       0
+    2  3       1       0       0       0       1
+
+    >>> pd.get_dummies(pd.Series(list('abcaa')))
+       a  b  c
+    0  1  0  0
+    1  0  1  0
+    2  0  0  1
+    3  1  0  0
+    4  1  0  0
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    new_qc = data._query_compiler.get_dummies(
+        columns=columns,
+        prefix=prefix,
+        prefix_sep=prefix_sep,
+        dummy_na=dummy_na,
+        drop_first=drop_first,
+        dtype=dtype,
+        is_series=not data._is_dataframe,
+    )
+    return DataFrame(query_compiler=new_qc)
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def melt(
+    frame,
+    id_vars=None,
+    value_vars=None,
+    var_name=None,
+    value_name="value",
+    col_level=None,
+    ignore_index: bool = True,
+):  # noqa: PR01, RT01, D200
+    """
+    Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
+
+    Parameters
+    ----------
+    id_vars : list of identifiers to retain in the result
+    value_vars : list of columns to unpivot on
+        defaults to all columns, excluding the id_vars columns
+    var_name : variable name, defaults to "variable"
+    value_name : value name, defaults to "value"
+    col_level : int, not implemented
+    ignore_index : bool, not implemented
+
+    Returns
+    -------
+    :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+        unpivoted on the value columns
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
+    ...           'B': {0: 1, 1: 3, 2: 5},
+    ...           'C': {0: 2, 1: 4, 2: 6}})
+
+    >>> pd.melt(df)
+      variable value
+    0        A     a
+    1        A     b
+    2        A     c
+    3        B     1
+    4        B     3
+    5        B     5
+    6        C     2
+    7        C     4
+    8        C     6
+
+    >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
+    ...           'B': {0: 1, 1: 3, 2: 5},
+    ...           'C': {0: 2, 1: 4, 2: 6}})
+    >>> pd.melt(df, id_vars=['A'], value_vars=['B'], var_name='myVarname', value_name='myValname')
+       A myVarname  myValname
+    0  a         B          1
+    1  b         B          3
+    2  c         B          5
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    return frame.melt(
+        id_vars=id_vars,
+        value_vars=value_vars,
+        var_name=var_name,
+        value_name=value_name,
+        col_level=col_level,
+        ignore_index=ignore_index,
+    )
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+@_inherit_docstrings(pandas.crosstab, apilink="pandas.crosstab")
+def crosstab(
+    index,
+    columns,
+    values=None,
+    rownames=None,
+    colnames=None,
+    aggfunc=None,
+    margins=False,
+    margins_name: str = "All",
+    dropna: bool = True,
+    normalize=False,
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Compute a simple cross tabulation of two (or more) factors.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    pandas_crosstab = pandas.crosstab(
+        index,
+        columns,
+        values,
+        rownames,
+        colnames,
+        aggfunc,
+        margins,
+        margins_name,
+        dropna,
+        normalize,
+    )
+    return DataFrame(pandas_crosstab)
+
+
+# Adding docstring since pandas docs don't have web section for this function.
+@snowpark_pandas_telemetry_standalone_function_decorator
+def lreshape(data: DataFrame, groups, dropna=True, label=None):
+    """
+    Reshape wide-format data to long. Generalized inverse of ``DataFrame.pivot``.
+
+    Accepts a dictionary, `groups`, in which each key is a new column name
+    and each value is a list of old column names that will be "melted" under
+    the new column name as part of the reshape.
+
+    Parameters
+    ----------
+    data : DataFrame
+        The wide-format DataFrame.
+    groups : dict
+        Dictionary in the form: `{new_name : list_of_columns}`.
+    dropna : bool, default: True
+        Whether include columns whose entries are all NaN or not.
+    label : optional
+        Deprecated parameter.
+
+    Returns
+    -------
+    DataFrame
+        Reshaped DataFrame.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if not isinstance(data, DataFrame):
+        raise ValueError(f"can not lreshape with instance of type {type(data)}")
+    return DataFrame(
+        pandas.lreshape(to_pandas(data), groups, dropna=dropna, label=label)
+    )
+
+
+@_inherit_docstrings(pandas.wide_to_long, apilink="pandas.wide_to_long")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def wide_to_long(
+    df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Unpivot a DataFrame from wide to long format.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if not isinstance(df, DataFrame):
+        raise ValueError(f"can not wide_to_long with instance of type {type(df)}")
+    # ErrorMessage.default_to_pandas("`wide_to_long`")
+    return DataFrame(
+        pandas.wide_to_long(to_pandas(df), stubnames, i, j, sep=sep, suffix=suffix)
+    )
+
+
+def _determine_name(objs: Iterable[BaseQueryCompiler], axis: int | str):
+    """
+    Determine names of index after concatenation along passed axis.
+
+    Parameters
+    ----------
+    objs : iterable of QueryCompilers
+        Objects to concatenate.
+    axis : int or str
+        The axis to concatenate along.
+
+    Returns
+    -------
+    list with single element
+        Computed index name, `None` if it could not be determined.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    axis = pandas.DataFrame()._get_axis_number(axis)
+
+    def get_names(obj):
+        return obj.columns.names if axis else obj.index.names
+
+    names = np.array([get_names(obj) for obj in objs])
+
+    # saving old name, only if index names of all objs are the same
+    if np.all(names == names[0]):
+        # we must do this check to avoid this calls `list(str_like_name)`
+        return list(names[0]) if is_list_like(names[0]) else [names[0]]
+    else:
+        return None
+
+
+@_inherit_docstrings(pandas.to_datetime, apilink="pandas.to_timedelta")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_timedelta(arg, unit=None, errors="raise"):  # noqa: PR01, RT01, D200
+    """
+    Convert argument to timedelta.
+
+    Accepts str, timedelta, list-like or Series for arg parameter.
+    Returns a Series if and only if arg is provided as a Series.
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+    ErrorMessage.not_implemented()
+
+    if isinstance(arg, Series):
+        query_compiler = arg._query_compiler.to_timedelta(unit=unit, errors=errors)
+        return Series(query_compiler=query_compiler)
+    return pandas.to_timedelta(arg, unit=unit, errors=errors)
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def date_range(
+    start: VALID_DATE_TYPE | None = None,
+    end: VALID_DATE_TYPE | None = None,
+    periods: int | None = None,
+    freq: str | pd.DateOffset | None = None,
+    tz: str | tzinfo | None = None,
+    normalize: bool = False,
+    name: Hashable | None = None,
+    inclusive: IntervalClosedType = "both",
+    **kwargs,
+) -> Series:
+    """
+    Return a fixed frequency series.
+
+    Returns the range of equally spaced time points (where the difference between any
+    two adjacent points is specified by the given frequency) such that they all
+    satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp.,
+    the first and last time points in that range that fall on the boundary of ``freq``
+    (if given as a frequency string) or that are valid for ``freq`` (if given as a
+    :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of ``start``,
+    ``end``, or ``freq`` is *not* specified, this missing parameter can be computed
+    given ``periods``, the number of timesteps in the range. See the note below.)
+
+    Parameters
+    ----------
+    start : str or datetime-like, optional
+        Left bound for generating dates.
+    end : str or datetime-like, optional
+        Right bound for generating dates.
+    periods : int, optional
+        Number of periods to generate.
+    freq : str or DateOffset, default 'D'
+        Frequency strings can have multiples, e.g. '5H'.
+    tz : str or tzinfo, optional
+        Time zone name for returning localized DatetimeIndex, for example
+        'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
+        timezone-naive.
+    normalize : bool, default False
+        Normalize start/end dates to midnight before generating date range.
+    name : str, default None
+        Name of the resulting DatetimeIndex.
+    inclusive : {"both", "neither", "left", "right"}, default "both"
+        Include boundaries; Whether to set each bound as closed or open.
+
+        .. versionadded:: 1.4.0
+    **kwargs
+        For compatibility. Has no effect on the result.
+
+    Returns
+    -------
+    rng : DatetimeIndex
+
+    See Also
+    --------
+    DatetimeIndex : An immutable container for datetimes.
+    timedelta_range : Return a fixed frequency TimedeltaIndex.
+    period_range : Return a fixed frequency PeriodIndex.
+    interval_range : Return a fixed frequency IntervalIndex.
+
+    Notes
+    -----
+    ``tz`` is not supported.
+
+    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
+    exactly three must be specified. If ``freq`` is omitted, the resulting
+    ``DatetimeIndex`` will have ``periods`` linearly spaced elements between
+    ``start`` and ``end`` (closed on both sides).
+
+    To learn more about the frequency strings, please see `this link
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
+
+    Also, custom or business frequencies are not implemented in Snowpark pandas, e.g., "B", "C", "SMS", "BMS", "CBMS",
+    "BQS", "BYS", "bh", "cbh".
+
+    Examples
+    --------
+    **Specifying the values**
+
+    The next four examples generate the same `DatetimeIndex`, but vary
+    the combination of `start`, `end` and `periods`.
+
+    Specify `start` and `end`, with the default daily frequency.
+
+    >>> pd.date_range(start='1/1/2018', end='1/08/2018')
+    0   2018-01-01
+    1   2018-01-02
+    2   2018-01-03
+    3   2018-01-04
+    4   2018-01-05
+    5   2018-01-06
+    6   2018-01-07
+    7   2018-01-08
+    dtype: datetime64[ns]
+
+    Specify `start` and `periods`, the number of periods (days).
+
+    >>> pd.date_range(start='1/1/2018', periods=8)
+    0   2018-01-01
+    1   2018-01-02
+    2   2018-01-03
+    3   2018-01-04
+    4   2018-01-05
+    5   2018-01-06
+    6   2018-01-07
+    7   2018-01-08
+    dtype: datetime64[ns]
+
+    Specify `end` and `periods`, the number of periods (days).
+
+    >>> pd.date_range(end='1/1/2018', periods=8)
+    0   2017-12-25
+    1   2017-12-26
+    2   2017-12-27
+    3   2017-12-28
+    4   2017-12-29
+    5   2017-12-30
+    6   2017-12-31
+    7   2018-01-01
+    dtype: datetime64[ns]
+
+    Specify `start`, `end`, and `periods`; the frequency is generated
+    automatically (linearly spaced).
+
+    >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
+    0   2018-04-24 00:00:00
+    1   2018-04-25 12:00:00
+    2   2018-04-27 00:00:00
+    dtype: datetime64[ns]
+
+    **Other Parameters**
+
+    Changed the `freq` (frequency) to ``'ME'`` (month end frequency).
+
+    >>> pd.date_range(start='1/1/2018', periods=5, freq='ME')
+    0   2018-01-31
+    1   2018-02-28
+    2   2018-03-31
+    3   2018-04-30
+    4   2018-05-31
+    dtype: datetime64[ns]
+
+    Multiples are allowed
+
+    >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME')
+    0   2018-01-31
+    1   2018-04-30
+    2   2018-07-31
+    3   2018-10-31
+    4   2019-01-31
+    dtype: datetime64[ns]
+
+    `freq` can also be specified as an Offset object.
+
+    >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))
+    0   2018-01-31
+    1   2018-04-30
+    2   2018-07-31
+    3   2018-10-31
+    4   2019-01-31
+    dtype: datetime64[ns]
+
+    Specify `tz` to set the timezone.
+
+    >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo')  # doctest: +SKIP
+    DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',
+                   '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',
+                   '2018-01-05 00:00:00+09:00'],
+                  dtype='datetime64[ns, Asia/Tokyo]', freq='D')
+
+    `inclusive` controls whether to include `start` and `end` that are on the
+    boundary. The default, "both", includes boundary points on either end.
+
+    >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both")
+    0   2017-01-01
+    1   2017-01-02
+    2   2017-01-03
+    3   2017-01-04
+    dtype: datetime64[ns]
+
+    Use ``inclusive='left'`` to exclude `end` if it falls on the boundary.
+
+    >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left')
+    0   2017-01-01
+    1   2017-01-02
+    2   2017-01-03
+    dtype: datetime64[ns]
+
+    Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and
+    similarly ``inclusive='neither'`` will exclude both `start` and `end`.
+
+    >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')
+    0   2017-01-02
+    1   2017-01-03
+    2   2017-01-04
+    dtype: datetime64[ns]
+    """
+    # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py
+
+    if freq is None and common.any_none(periods, start, end):
+        freq = "D"
+
+    if common.count_not_none(start, end, periods, freq) != 3:
+        raise ValueError(
+            "Of the four parameters: start, end, periods, and freq, exactly three must be specified"
+        )
+
+    # Validation code is mostly copied from pandas code DatetimeArray._generate_range and it will cast it to an integer
+    periods = datetimelike.validate_periods(periods)
+
+    # Return DateOffset object from string or datetime.timedelta object
+    freq = to_offset(freq)
+
+    if freq is None and periods < 0:
+        raise ValueError("Number of samples, %s, must be non-negative." % periods)
+
+    if start is not None:
+        start = Timestamp(start)
+
+    if end is not None:
+        end = Timestamp(end)
+
+    if start is NaT or end is NaT:
+        raise ValueError("Neither `start` nor `end` can be NaT")
+
+    # Check that the `inclusive` argument is among {"both", "neither", "left", "right"}
+    left_inclusive, right_inclusive = validate_inclusive(inclusive)
+
+    # If normalize is needed, set start and end time to midnight
+    start, end = _maybe_normalize_endpoints(start, end, normalize)
+
+    # If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.
+    # If more than one of these inputs provides a timezone, require that they all agree.
+    tz = _infer_tz_from_endpoints(start, end, tz)
+
+    qc = SnowflakeQueryCompiler.from_date_range(
+        start=start,
+        end=end,
+        periods=periods,
+        freq=freq,
+        tz=tz,
+        left_inclusive=left_inclusive,
+        right_inclusive=right_inclusive,
+    )
+    s = Series(query_compiler=qc)
+    s.name = name
+    return s
+
+
+@_inherit_docstrings(pandas.qcut, apilink="pandas.qcut")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def qcut(
+    x: np.ndarray | Series,
+    q: int | ListLikeOfFloats,
+    labels: ListLike | bool | None = None,
+    retbins: bool = False,
+    precision: int = 3,
+    duplicates: Literal["raise"] | Literal["drop"] = "raise",
+):  # noqa: PR01, RT01, D200
+    """
+    Quantile-based discretization function. Inherits docstrings from Pandas.
+    retbins=True is not supported in Snowpark pandas.
+
+    labels=False will run binning computation in Snowflake, whereas if labels is an array
+    the data will be fetched to the client and the binning run client-side, as Snowpark pandas API does
+    not yet support pd.Categorical in its ORM mapper.
+    """
+
+    kwargs = {
+        "labels": labels,
+        "retbins": retbins,
+        "precision": precision,
+        "duplicates": duplicates,
+    }
+
+    # For numpy or list, call to native pandas.
+    if not isinstance(x, Series):
+        return pandas.qcut(x, q, **kwargs)
+
+    # Check that labels is supported as in pandas.
+    if not (labels is None or labels is False or is_list_like(labels)):
+        raise ValueError(
+            "Bin labels must either be False, None or passed in as a list-like argument"
+        )
+
+    # Carry out check that for the list-like case quantiles are (monotonically) increasing,
+    # if not the case throw pandas compatible error.
+    if not isinstance(q, int) and np.all(np.diff(q) < 0):
+        # Note: Pandas 2.x changed the error message here, using Pandas 2.x behavior here.
+        raise ValueError("left side of interval must be <= right side")
+
+        # remove duplicates (input like [0.5, 0.5] is ok)
+        q = sorted(list(set(q)))
+
+    if labels is not False:
+        # Labels require categorical, not yet supported. Use native pandas conversion here to compute result.
+        return pandas.qcut(x.to_pandas(), q, **kwargs)
+
+    ans = x._qcut(q, retbins, duplicates)
+
+    # Within Snowpark Pandas, we avoid issuing a count query. However, for qcut if q !=1 and x is a Series/list-like containing
+    # a single element, an error will be produced  ValueError: Bin edges must be unique: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]).
+    #                You can drop duplicate edges by setting the 'duplicates' kwarg.
+    # With q qcut being an API that requires conversion, we can mimick this behavior here.
+    ret = ans.to_pandas().to_numpy()
+
+    if len(ret) == 1 and isinstance(q, int) and q != 1:
+        if duplicates == "raise":
+            # Produce raising error.
+            raise ValueError(
+                f"Bin edges must be unique: {repr(np.array([0.] * q))}.\nYou can drop duplicate edges by setting the 'duplicates' kwarg."
+            )
+        else:
+            # The result will be always NaN because no unique bin could be found.
+            return np.array([np.nan])
+
+    return ret
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def cut(
+    x: AnyArrayLike,
+    bins: int | Sequence[Scalar] | IntervalIndex,
+    right: bool = True,
+    labels=None,
+    retbins: bool = False,
+    precision: int = 3,
+    include_lowest: bool = False,
+    duplicates: str = "raise",
+    ordered: bool = True,
+):
+    """
+    Bin values into discrete intervals.
+
+    Use `cut` when you need to segment and sort data values into bins. This
+    function is also useful for going from a continuous variable to a
+    categorical variable. For example, `cut` could convert ages to groups of
+    age ranges. Supports binning into an equal number of bins, or a
+    pre-specified array of bins.
+
+    Parameters
+    ----------
+    x : array-like
+        The input array to be binned. Must be 1-dimensional.
+    bins : int, sequence of scalars
+        The criteria to bin by.
+
+        * int : Defines the number of equal-width bins in the range of `x`. The
+          range of `x` is extended by .1% on each side to include the minimum
+          and maximum values of `x`.
+        * sequence of scalars : Defines the bin edges allowing for non-uniform
+          width. No extension of the range of `x` is done.
+
+    right : bool, default True
+        Indicates whether `bins` includes the rightmost edge or not. If
+        ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
+        indicate (1,2], (2,3], (3,4]. This argument is ignored when
+        `bins` is an IntervalIndex.
+    labels : array or False, default None
+        Specifies the labels for the returned bins. Must be the same length as
+        the resulting bins. If False, returns only integer indicators of the
+        bins. This affects the type of the output container (see below).
+        This argument is ignored when `bins` is an IntervalIndex. If True,
+        raises an error. When `ordered=False`, labels must be provided.
+
+        Snowpark pandas API does not support labels=None.
+        Labels must be of a Snowpark pandas API supported dtype.
+
+    retbins : bool, default False
+        Snowpark pandas API does not support this parameter yet.
+    precision : int, default 3
+        The precision at which to store and display the bins labels.
+    include_lowest : bool, default False
+        Whether the first interval should be left-inclusive or not.
+    duplicates : {default 'raise', 'drop'}, optional
+        If bin edges are not unique, raise ValueError or drop non-uniques.
+    ordered : bool, default True
+        Whether the labels are ordered or not. Applies to returned types
+        Categorical and Series (with Categorical dtype). If True,
+        the resulting categorical will be ordered. If False, the resulting
+        categorical will be unordered (labels must be provided).
+
+    Returns
+    -------
+    out : Categorical, Series, or ndarray
+        An array-like object representing the respective bin for each value
+        of `x`. The type depends on the value of `labels`.
+
+        * None (default) : returns a Series for Series `x` or a
+          Categorical for all other inputs. The values stored within
+          are Interval dtype.
+
+        * sequence of scalars : returns a Series for Series `x` or a
+          Categorical for all other inputs. The values stored within
+          are whatever the type in the sequence is.
+
+        * False : returns an ndarray of integers.
+
+    bins : numpy.ndarray
+        The computed or specified bins. Only returned when `retbins=True`.
+        For scalar or sequence `bins`, this is an ndarray with the computed
+        bins. If set `duplicates=drop`, `bins` will drop non-unique bin.
+
+    Notes
+    -----
+    Any NA values will be NA in the result. Out of bounds values will be NA in
+    the resulting Series or Categorical object.
+
+    Snowpark pandas API does not natively support Categorical and categorical types. When calling `cut` with a
+    Snowpark pandas Series and using `labels=False`, a Snowpark pandas Series object is returned. However,
+    for `labels != False` an error is raised.
+
+    Examples
+    --------
+    Discretize into three equal-sized bins.
+
+    >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, labels=False)
+    ... # doctest: +ELLIPSIS
+    array([0, 2, 1, 1, 2, 0])
+
+    ``labels=False`` implies you just want the bins back.
+
+    >>> pd.cut([0, 1, 1, 2], bins=4, labels=False)
+    array([0, 1, 1, 3])
+
+    Passing a Series as an input returns a Series with labels=False:
+
+    >>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
+    ...               index=['a', 'b', 'c', 'd', 'e'])
+    >>> pd.cut(s, 3, labels=False)
+    ... # doctest: +ELLIPSIS
+    a    0
+    b    0
+    c    1
+    d    2
+    e    2
+    dtype: int64
+    """
+
+    if retbins is True:
+        ErrorMessage.not_implemented("retbins not supported.")
+
+    # Execute other supported objects via native pandas.
+    if not isinstance(x, Series):
+        return pandas.cut(
+            x,
+            bins,
+            right=right,
+            labels=labels,
+            retbins=retbins,
+            precision=precision,
+            include_lowest=include_lowest,
+            duplicates=duplicates,
+            ordered=ordered,
+        )
+
+    # Produce pandas-compatible error if ordered=False and labels are not specified.
+    # No error is raised when labels are not desired (labels=False).
+    if ordered is False and labels is None:
+        raise ValueError("'labels' must be provided if 'ordered = False'")
+
+    bins, qc = x._query_compiler.cut(
+        bins,
+        right=right,
+        labels=labels,
+        precision=precision,
+        include_lowest=include_lowest,
+        duplicates=duplicates,
+    )
+
+    # Depending on setting, reconstruct bins and convert qc to the correct result.
+    if labels is False:
+        return pd.Series(query_compiler=qc)
+    else:
+        # Raise NotImplemented Error as categorical is not supported.
+        ErrorMessage.not_implemented("categorical not supported in Snowpark pandas API")
+
+        # Following code would produce correct result, uncomment once categorical is supported.
+        # Convert to pandas categorical and return as Series.
+        # Note: In the future, once we support CategoricalType we could keep this lazily around. For now,
+        # match what pandas does here. In the future, change pandas -> pd and everything should work out-of-the box.
+        # arr = qc.to_numpy().ravel()
+        # return pandas.Series(
+        #    pandas.Categorical(values=arr, categories=labels, ordered=ordered)
+        # )
diff --git a/src/snowflake/snowpark/modin/pandas/groupby.py b/src/snowflake/snowpark/modin/pandas/groupby.py
new file mode 100644
index 00000000000..8e44354ac4a
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/groupby.py
@@ -0,0 +1,1209 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement GroupBy public API as pandas does."""
+
+from collections.abc import Hashable
+from typing import Any, Callable, Literal, Optional, Union
+
+import numpy as np  # noqa: F401
+import numpy.typing as npt
+import pandas
+import pandas.core.groupby
+from pandas._libs.lib import NoDefault, no_default
+from pandas._typing import AggFuncType, Axis, IndexLabel
+from pandas.core.dtypes.common import is_dict_like, is_list_like, is_numeric_dtype
+from pandas.errors import SpecificationError
+from pandas.io.formats.printing import PrettyDict
+from pandas.util._validators import validate_bool_kwarg
+
+# following import are used in doctest
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+
+# Snowpark pandas API version
+from snowflake.snowpark.modin.pandas.series import Series
+from snowflake.snowpark.modin.pandas.utils import (
+    raise_if_native_pandas_objects,
+    validate_and_try_convert_agg_func_arg_func_to_str,
+)
+from snowflake.snowpark.modin.plugin._internal.apply_utils import (
+    create_groupby_transform_func,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    _inherit_docstrings,
+    doc_replace_dataframe_with_link,
+    hashable,
+    validate_int_kwarg,
+)
+
+_DEFAULT_BEHAVIOUR = {
+    "__class__",
+    "__getitem__",
+    "__init__",
+    "__iter__",
+    "_as_index",
+    "_axis",
+    "_by",
+    "_check_index_name",
+    "_columns",
+    "_df",
+    "_groups_cache",
+    "_idx_name",
+    "_index",
+    "_indices_cache",
+    "_internal_by",
+    "_internal_by_cache",
+    "_iter",
+    "_kwargs",
+    "_level",
+    "_pandas_class",
+    "_query_compiler",
+    "_sort",
+    "_wrap_aggregation",
+}
+
+
+@_inherit_docstrings(
+    pandas.core.groupby.DataFrameGroupBy, modify_doc=doc_replace_dataframe_with_link
+)
+class DataFrameGroupBy(metaclass=TelemetryMeta):
+    _pandas_class = pandas.core.groupby.DataFrameGroupBy
+
+    def __init__(
+        self,
+        df,
+        by,
+        axis,
+        level,
+        as_index,
+        sort,
+        group_keys,
+        idx_name,
+        **kwargs,
+    ) -> None:
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        self._axis = axis
+        self._idx_name = idx_name
+        self._df = df
+        self._df._query_compiler.validate_groupby(by, axis, level)
+        self._query_compiler = self._df._query_compiler
+        self._columns = self._query_compiler.columns
+        self._by = by
+        self._level = level
+        self._kwargs = {
+            "level": level,
+            "sort": sort,
+            "as_index": as_index,
+            "group_keys": group_keys,
+        }
+        self._kwargs.update(kwargs)
+
+    def __getattr__(self, key):
+        """
+        Alter regular attribute access, looks up the name in the columns.
+
+        Parameters
+        ----------
+        key : str
+            Attribute name.
+
+        Returns
+        -------
+        The value of the attribute.
+        """
+        try:
+            return object.__getattribute__(self, key)
+        except AttributeError as err:
+            if key in self._columns:
+                return self.__getitem__(key)
+            raise err
+
+    @property
+    def ngroups(self):
+        return self._query_compiler.groupby_ngroups(
+            by=self._by,
+            axis=self._axis,
+            groupby_kwargs=self._kwargs,
+        )
+
+    def skew(self, *args, **kwargs):
+        ErrorMessage.method_not_implemented_error(name="skew", class_="GroupBy")
+
+    def ffill(self, limit=None):
+        ErrorMessage.method_not_implemented_error(name="ffill", class_="GroupBy")
+
+    def sem(self, ddof=1):
+        ErrorMessage.method_not_implemented_error(name="sem", class_="GroupBy")
+
+    def value_counts(
+        self,
+        subset=None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        dropna: bool = True,
+    ):
+        ErrorMessage.method_not_implemented_error(name="value_counts", class_="GroupBy")
+
+    def mean(
+        self,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_mean", engine, engine_kwargs
+        )
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="mean",
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def any(self, skipna=True):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="any", class_="GroupBy")
+
+    @property
+    def plot(self):  # pragma: no cover
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="plot", class_="GroupBy")
+
+    def ohlc(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="ohlc", class_="GroupBy")
+
+    def __bytes__(self):
+        """
+        Convert DataFrameGroupBy object into a python2-style byte string.
+
+        Returns
+        -------
+        bytearray
+            Byte array representation of `self`.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="__bytes__", class_="GroupBy")
+
+    _groups_cache = no_default
+
+    # TODO: since python 3.9:
+    # @cached_property
+    @property
+    def groups(self) -> PrettyDict[Hashable, pd.Index]:
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._query_compiler.groupby_groups(
+            self._by,
+            self._axis,
+            groupby_kwargs={
+                # groupby.groups always treat as_index as True. this seems to be
+                # intended behavior: https://github.com/pandas-dev/pandas/issues/56965
+                k: True if k == "as_index" else v
+                for k, v in self._kwargs.items()
+            },
+        )
+
+    def min(
+        self,
+        numeric_only: bool = False,
+        min_count: int = -1,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+    ):
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_min", engine, engine_kwargs
+        )
+        validate_int_kwarg(min_count, "min_count", float_allowed=False)
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="min",
+            agg_kwargs=dict(min_count=min_count, numeric_only=numeric_only),
+        )
+
+    def idxmax(
+        self, axis: Axis = no_default, skipna: bool = True, numeric_only: bool = False
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        axis_number = self._df._get_axis_number(axis)
+        if axis_number == 1:
+            # Performing idxmax is deprecated and will be removed in a future pandas version.
+            raise NotImplementedError(
+                "DataFrameGroupBy.idxmax with axis=1 is deprecated and will be removed in a "
+                "future version. Operate on the un-grouped DataFrame instead."
+            )
+        else:
+            # When performing idxmax/idxmin on axis=0, it can be done column-wise.
+            result = self._wrap_aggregation(
+                qc_method=type(self._query_compiler).groupby_agg,
+                numeric_only=numeric_only,
+                how="axis_wise",
+                agg_func="idxmax",
+                # axis is also specified here since the axis used with idxmax/idxmin is different from
+                # the groupby axis.
+                agg_kwargs=dict(skipna=skipna, axis=0),
+            )
+        return result
+
+    def idxmin(
+        self, axis: Axis = no_default, skipna: bool = True, numeric_only: bool = False
+    ) -> Series:
+        axis_number = self._df._get_axis_number(axis)
+        if axis_number == 1:
+            # Performing idxmin is deprecated and will be removed in a future pandas version.
+            raise NotImplementedError(
+                "DataFrameGroupBy.idxmin with axis=1 is deprecated and will be removed in a "
+                "future version. Operate on the un-grouped DataFrame instead."
+            )
+        else:
+            # When performing idxmax/idxmin on axis=0, it can be done column-wise.
+            result = self._wrap_aggregation(
+                qc_method=type(self._query_compiler).groupby_agg,
+                numeric_only=numeric_only,
+                how="axis_wise",
+                agg_func="idxmin",
+                # axis is also specified here since the axis used with idxmax/idxmin is different from
+                # the groupby axis.
+                agg_kwargs=dict(skipna=skipna, axis=0),
+            )
+        return result
+
+    @property
+    def ndim(self):
+        """
+        Return 2.
+
+        Returns
+        -------
+        int
+            Returns 2.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return 2  # ndim is always 2 for DataFrames
+
+    def shift(
+        self, periods: int = 1, freq: int = None, axis: Axis = 0, fill_value: Any = None
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        if not isinstance(periods, int):
+            raise TypeError(
+                f"Periods must be integer, but {periods} is {type(periods)}."
+            )
+        qc = self._query_compiler.groupby_shift(
+            self._by, self._axis, self._level, periods, freq, fill_value, self.ndim == 1
+        )
+        return (
+            pd.Series(query_compiler=qc)
+            if self.ndim == 1
+            else pd.DataFrame(query_compiler=qc)
+        )
+
+    def nth(self, n, dropna=None):
+        ErrorMessage.method_not_implemented_error(name="nth", class_="GroupBy")
+
+    def cumsum(self, axis: Axis = 0, *args, **kwargs):
+        qc = self._query_compiler.groupby_cumsum(self._by, self._axis, self._kwargs)
+        return (
+            pd.Series(query_compiler=qc)
+            if self.ndim == 1
+            else pd.DataFrame(query_compiler=qc)
+        )
+
+    @property
+    def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._query_compiler.groupby_indices(
+            self._by,
+            self._axis,
+            groupby_kwargs={
+                # groupby.indices always treat as_index as True. this seems to be
+                # intended behavior: https://github.com/pandas-dev/pandas/issues/56965
+                k: True if k == "as_index" else v
+                for k, v in self._kwargs.items()
+            },
+        )
+
+    @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy.pct_change)
+    def pct_change(self, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="pct_change", class_="GroupBy")
+
+    def filter(self, func, dropna=True, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="filter", class_="GroupBy")
+
+    def cummax(self, axis: Axis = 0, numeric_only: bool = False, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        qc = self._query_compiler.groupby_cummax(
+            self._by, self._axis, numeric_only, self._kwargs
+        )
+        return (
+            pd.Series(query_compiler=qc)
+            if self.ndim == 1
+            else pd.DataFrame(query_compiler=qc)
+        )
+
+    def apply(self, func, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        # TODO: SNOW-1244717: Explore whether window function are performant and can be used
+        #       whenever `func` is an aggregation function.
+        if not callable(func):
+            raise NotImplementedError("No support for non-callable `func`")
+        dataframe_result = pd.DataFrame(
+            query_compiler=self._query_compiler.groupby_apply(
+                self._by,
+                agg_func=func,
+                axis=self._axis,
+                groupby_kwargs=self._kwargs,
+                agg_args=args,
+                agg_kwargs=kwargs,
+            )
+        )
+        if dataframe_result.columns.equals(pandas.Index([MODIN_UNNAMED_SERIES_LABEL])):
+            return dataframe_result.squeeze(axis=1)
+        return dataframe_result
+
+    @property
+    def dtypes(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="dtypes", class_="GroupBy")
+
+    def first(self, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="first", class_="GroupBy")
+
+    _internal_by_cache = no_default
+
+    # TODO: since python 3.9:
+    # @cached_property
+    @property
+    def _internal_by(self):
+        """
+        Get only those components of 'by' that are column labels of the source frame.
+
+        Returns
+        -------
+        tuple of labels
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        if self._internal_by_cache is not no_default:
+            return self._internal_by_cache
+
+        by_list = self._by if is_list_like(self._by) else [self._by]
+
+        internal_by = tuple(
+            by for by in by_list if hashable(by) and by in self._columns
+        )
+
+        self._internal_by_cache = internal_by
+        return internal_by
+
+    def __getitem__(self, key):
+        """
+        Implement indexing operation on a DataFrameGroupBy object.
+
+        Parameters
+        ----------
+        key : list or str
+            Names of columns to use as subset of original object.
+
+        Returns
+        -------
+        DataFrameGroupBy or SeriesGroupBy
+            Result of indexing operation.
+
+        Raises
+        ------
+        NotImplementedError
+            Column lookups on GroupBy with arbitrary Series in by is not yet supported.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        if self._axis == 1:
+            raise ValueError("Cannot subset columns when using axis=1")
+
+        # These parameters are common for building the resulted Series or DataFrame groupby object
+        kwargs = {
+            **self._kwargs.copy(),
+            "by": self._by,
+            "axis": self._axis,
+            "idx_name": self._idx_name,
+        }
+        # The rules of type deduction for the resulted object is the following:
+        #   1. If `key` is a list-like or `as_index is False`, then the resulted object is a DataFrameGroupBy
+        #   2. Otherwise, the resulted object is SeriesGroupBy
+        #   3. Result type does not depend on the `by` origin
+        # Examples:
+        #   - drop: any, as_index: any, __getitem__(key: list_like) -> DataFrameGroupBy
+        #   - drop: any, as_index: False, __getitem__(key: any) -> DataFrameGroupBy
+        #   - drop: any, as_index: True, __getitem__(key: label) -> SeriesGroupBy
+        if is_list_like(key):
+            make_dataframe = True
+        else:
+            if self._as_index:
+                make_dataframe = False
+            else:
+                make_dataframe = True
+            key = [key]
+
+        column_index = self._df.columns
+        # validate that all keys are labels belong to the data column of the df
+        for label in key:
+            if not (label in column_index):
+                raise KeyError(f"Columns not found: '{label}'")
+
+        # internal_by records all label in by that belongs to the data columns
+        internal_by = frozenset(self._internal_by)
+        if len(internal_by.intersection(key)) != 0:
+            message = (
+                "Data column selection with overlap of 'by' columns is not yet supported, "
+                "please duplicate the overlapped by columns and rename it to a different name"
+            )
+            ErrorMessage.not_implemented(message=message)
+
+        # select the union of the internal bys and select keys. Here we find all integer
+        # positions for all the selected columns, and then call iloc to select all columns.
+        # This is because loc currently doesn't support select with multiindex, once iloc and
+        # dataframe getitem is supported, this can be replaced with df[list(internal_by) + list(key)]
+        # TODO (SNOW-896342): update self._df.iloc[:, ilocs_list] to use df[list(internal_by) + list(key)]
+        #           once dataframe getitem is supported.
+        _, by_ilocs = column_index._get_indexer_strict(list(internal_by), "columns")
+        _, key_ilocs = column_index._get_indexer_strict(list(key), "columns")
+        ilocs_list = list(by_ilocs) + list(key_ilocs)
+
+        if len(key_ilocs) > 1:
+            make_dataframe = True
+
+        if make_dataframe:
+            return DataFrameGroupBy(
+                self._df.iloc[:, ilocs_list],
+                **kwargs,
+            )
+        else:
+            return SeriesGroupBy(
+                self._df.iloc[:, ilocs_list],
+                **kwargs,
+            )
+
+    def cummin(self, axis: Axis = 0, numeric_only: bool = False, *args, **kwargs):
+        qc = self._query_compiler.groupby_cummin(
+            self._by, self._axis, numeric_only, self._kwargs
+        )
+        return (
+            pd.Series(query_compiler=qc)
+            if self.ndim == 1
+            else pd.DataFrame(query_compiler=qc)
+        )
+
+    def bfill(self, limit=None):
+        ErrorMessage.method_not_implemented_error(name="bfill", class_="GroupBy")
+
+    def prod(self, numeric_only=False, min_count=0):
+        ErrorMessage.method_not_implemented_error(name="prod", class_="GroupBy")
+
+    def std(
+        self,
+        ddof: int = 1,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        numeric_only: bool = False,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_std", engine, engine_kwargs
+        )
+        return self._wrap_aggregation(
+            qc_method=SnowflakeQueryCompiler.groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="std",
+            agg_kwargs=dict(ddof=ddof, numeric_only=numeric_only),
+        )
+
+    def aggregate(
+        self,
+        func: Optional[AggFuncType] = None,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_aggregate", engine, engine_kwargs
+        )
+        if self._axis != 0 and (is_dict_like(func) or is_list_like(func)):
+            # This is the same as pandas for func that is a list or dict
+            ErrorMessage.not_implemented(
+                "axis other than 0 is not supported"
+            )  # pragma: no cover
+
+        func = validate_and_try_convert_agg_func_arg_func_to_str(
+            agg_func=func, obj=self, allow_duplication=True, axis=self._axis
+        )
+
+        if isinstance(func, str):
+            # Using "getattr" here masks possible AttributeError which we throw
+            # in __getattr__, so we should call __getattr__ directly instead.
+            agg_func = self.__getattr__(func)
+            if callable(agg_func):
+                return agg_func(*args, **kwargs)
+
+        # when the aggregation function passed in is list like always return a Dataframe regardless
+        # it is SeriesGroupBy or DataFrameGroupBy
+        is_result_dataframe = (self.ndim == 2) or is_list_like(func)
+        result = self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=False,
+            agg_func=func,
+            agg_args=args,
+            agg_kwargs=kwargs,
+            how="axis_wise",
+            is_result_dataframe=is_result_dataframe,
+        )
+        return result
+
+    agg = aggregate
+
+    def last(self, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="last", class_="GroupBy")
+
+    def rank(
+        self,
+        method: str = "average",
+        ascending: bool = True,
+        na_option: str = "keep",
+        pct: bool = False,
+        *args,
+        **kwargs,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        query_compiler = self._query_compiler.groupby_rank(
+            by=self._by,
+            axis=self._axis,
+            method=method,
+            na_option=na_option,
+            ascending=ascending,
+            pct=pct,
+            groupby_kwargs=self._kwargs,
+            agg_args=args,
+            agg_kwargs=kwargs,
+        )
+        if self.ndim == 1:
+            result = pd.Series(query_compiler=query_compiler)
+        else:
+            result = pd.DataFrame(query_compiler=query_compiler)
+        return result
+
+    @property
+    def corrwith(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="corrwith", class_="GroupBy")
+
+    def max(
+        self,
+        numeric_only: bool = False,
+        min_count: int = -1,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_max", engine, engine_kwargs
+        )
+        validate_int_kwarg(min_count, "min_count", float_allowed=False)
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="max",
+            agg_kwargs=dict(min_count=min_count, numeric_only=numeric_only),
+        )
+
+    def var(
+        self,
+        ddof: int = 1,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        numeric_only: bool = False,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_var", engine, engine_kwargs
+        )
+
+        return self._wrap_aggregation(
+            qc_method=SnowflakeQueryCompiler.groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="var",
+            agg_kwargs=dict(ddof=ddof, numeric_only=numeric_only),
+        )
+
+    def get_group(self, name, obj=None):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="get_group", class_="GroupBy")
+
+    def __len__(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="__len__", class_="GroupBy")
+
+    def all(self, skipna=True):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="all", class_="GroupBy")
+
+    def size(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="size", class_="GroupBy")
+
+    def sum(
+        self,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_sum", engine, engine_kwargs
+        )
+        validate_int_kwarg(min_count, "min_count", float_allowed=False)
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="sum",
+            agg_kwargs=dict(min_count=min_count, numeric_only=numeric_only),
+        )
+
+    def describe(self, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="describe", class_="GroupBy")
+
+    def boxplot(
+        self,
+        grouped,
+        subplots=True,
+        column=None,
+        fontsize=None,
+        rot=0,
+        grid=True,
+        ax=None,
+        figsize=None,
+        layout=None,
+        **kwargs,
+    ):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="boxplot", class_="GroupBy")
+
+    def ngroup(self, ascending=True):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="ngroup", class_="GroupBy")
+
+    def nunique(self, dropna=True):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_nunique,
+            agg_func="nunique",
+            agg_kwargs=dict(dropna=dropna),
+        )
+
+    def resample(self, rule, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="resample", class_="GroupBy")
+
+    def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="sample", class_="GroupBy")
+
+    def median(self, numeric_only: bool = False):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=numeric_only,
+            agg_func="median",
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def head(self, n=5):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        # Ensure that n is an integer value.
+        if not isinstance(n, int):
+            raise TypeError("n must be an integer value.")
+
+        # Only the groupby parameter "dropna" affects the output of head. None of the other groupby
+        # parameters: as_index, sort, and group_keys, affect head.
+        # Values needed for the helper functions.
+        agg_kwargs = {
+            "n": n,
+            "level": self._level,
+            "dropna": self._kwargs.get("dropna", True),
+        }
+
+        result = self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            agg_func="head",
+            agg_kwargs=agg_kwargs,
+        )
+        return pd.DataFrame(result)
+
+    def cumprod(self, axis=0, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="cumprod", class_="GroupBy")
+
+    def __iter__(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._iter.__iter__()
+
+    def cov(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="cov", class_="GroupBy")
+
+    def transform(
+        self,
+        func: Union[str, Callable],
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ) -> "pd.DataFrame":
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        WarningMessage.warning_if_engine_args_is_set(
+            "groupby_transform", engine, engine_kwargs
+        )
+
+        # The resultant DataFrame from `transform` has an index that always matches the original
+        # DataFrame's index.
+        # Create a new groupby object so that we can tune parameters to ensure that `apply`
+        # returns a DataFrame with the required index (same as original DataFrame).
+        #
+        # Unlike `transform`, groupby parameters affect the result of `apply`.
+        # - `group_keys` controls whether the grouped column(s) are included in the index.
+        #   `group_keys` needs to be False to ensure that the resultant DataFrame has the
+        #   original DataFrame's index.
+        #
+        # - `dropna` controls whether the NA values should be included as a group/be present
+        #    in the group keys. `transform` always includes the NA values, therefore `dropna`
+        #    needs to be False to ensure that all NA values are included.
+        #
+        # - `sort` controls whether the group keys are sorted.
+        #
+        # - `as_index` controls whether the groupby object has group labels as the index.
+        by = self._by
+        level = self._level
+        groupby_obj = self._df.groupby(
+            by=by,  # either by or levels can be specified at a time
+            level=level,
+            as_index=self._as_index,
+            group_keys=False,
+            dropna=False,
+            sort=self._sort,
+        )
+
+        # Apply the transform function to each group.
+        res = groupby_obj.apply(
+            create_groupby_transform_func(func, by, level, *args, **kwargs)
+        )
+
+        dropna = self._kwargs.get("dropna", True)
+        if dropna is True:
+            # - To avoid dropping any NA values, `dropna` is set to False in both the groupby
+            #   object created above and the groupby object created in `create_groupby_transform_func`.
+            #
+            # - If dropna is set to True in the groupby object, the output from this code (so far)
+            #   and the expected native pandas result differs.
+            #
+            # - In the Snowpark pandas code, all rows grouped under NA keys calculate the result with
+            #   the given `func`, thus resulting in non-NA values.
+            #
+            # - In the native pandas version, all rows grouped under NA keys take up
+            #   "NaN" values in all columns.
+            #
+            # Therefore, we need to convert the rows grouped under NA keys to have NaN values in
+            # all columns.
+            na_col_data = self._df[by].isna()
+            condition = (
+                na_col_data.any(axis=1)
+                if isinstance(na_col_data, pd.DataFrame)
+                else na_col_data
+            )
+            res.loc[condition, :] = np.nan
+
+        return res
+
+    def corr(self, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="corr", class_="GroupBy")
+
+    def fillna(self, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="fillna", class_="GroupBy")
+
+    def count(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        result = self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            numeric_only=False,
+            agg_func="count",
+        )
+        return result
+
+    def pipe(self, func, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="pipe", class_="GroupBy")
+
+    def cumcount(self, ascending: bool = True):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        query_compiler = self._query_compiler.groupby_cumcount(
+            self._by, self._axis, self._kwargs, ascending
+        )
+        return pd.Series(query_compiler=query_compiler)
+
+    def tail(self, n=5):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        # Ensure that n is an integer value.
+        if not isinstance(n, int):
+            raise TypeError("n must be an integer value.")
+
+        # Only the groupby parameter "dropna" affects the output of tail. None of the other groupby
+        # parameters: as_index, sort, and group_keys, affect tail.
+        # Values needed for the helper functions.
+        agg_kwargs = {
+            "n": n,
+            "level": self._level,
+            "dropna": self._kwargs.get("dropna", True),
+        }
+
+        result = self._wrap_aggregation(
+            qc_method=type(self._query_compiler).groupby_agg,
+            agg_func="tail",
+            agg_kwargs=agg_kwargs,
+        )
+        return pd.DataFrame(result)
+
+    # expanding and rolling are unique cases and need to likely be handled
+    # separately. They do not appear to be commonly used.
+    def expanding(self, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="expanding", class_="GroupBy")
+
+    def rolling(self, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="rolling", class_="GroupBy")
+
+    def hist(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="hist", class_="GroupBy")
+
+    def quantile(self, q=0.5, interpolation="linear"):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._wrap_aggregation(
+            type(self._query_compiler).groupby_agg,
+            numeric_only=False,
+            agg_func="quantile",
+            agg_kwargs=dict(q=q, interpolation=interpolation),
+        )
+
+    def diff(self):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="diff", class_="GroupBy")
+
+    def take(self, *args, **kwargs):
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="take", class_="GroupBy")
+
+    @property
+    def _index(self):
+        """
+        Get index value.
+
+        Returns
+        -------
+        pandas.Index
+            Index value.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._query_compiler.index
+
+    @property
+    def _sort(self):
+        """
+        Get sort parameter value.
+
+        Returns
+        -------
+        bool
+            Value of sort parameter used to create DataFrameGroupBy object.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._kwargs.get("sort")
+
+    @property
+    def _as_index(self):
+        """
+        Get as_index parameter value.
+
+        Returns
+        -------
+        bool
+            Value of as_index parameter used to create DataFrameGroupBy object.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        return self._kwargs.get("as_index")
+
+    @property
+    def _iter(self):
+        """
+        Construct a tuple of (group_id, DataFrame) tuples to allow iteration over groups.
+
+        Returns
+        -------
+        generator
+            Generator expression of GroupBy object broken down into tuples for iteration.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="_iter", class_="GroupBy")
+
+    def _wrap_aggregation(
+        self,
+        qc_method: Callable,
+        numeric_only: bool = False,
+        agg_args: list[Any] = None,
+        agg_kwargs: dict[str, Any] = None,
+        is_result_dataframe: Optional[bool] = None,
+        **kwargs: Any,
+    ):
+        """
+        Perform common metadata transformations and apply groupby functions.
+
+        Parameters
+        ----------
+        qc_method : callable
+            The query compiler method to call.
+        numeric_only : bool, default: False
+            Specifies whether to aggregate non numeric columns:
+                - True: include only numeric columns (including categories that holds a numeric dtype)
+                - False: include all columns
+        agg_args : list-like, optional
+            Positional arguments to pass to the aggregation function.
+        agg_kwargs : dict-like, optional
+            Keyword arguments to pass to the aggregation function.
+        is_result_dataframe: bool optional
+            whether the result of aggregation is a dataframe or series. If None, is_result_dataframe will be
+            False for SeriesGroupBy, and True for DataFrameGroupBy.
+        **kwargs : dict
+            Keyword arguments to pass to the specified query compiler's method.
+
+        Returns
+        -------
+        DataFrame or Series
+            Returns the same type as `self._df`.
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        numeric_only = validate_bool_kwarg(
+            numeric_only, "numeric_only", none_allowed=True
+        )
+
+        agg_args = tuple() if agg_args is None else agg_args
+        agg_kwargs = dict() if agg_kwargs is None else agg_kwargs
+
+        is_series_groupby = self.ndim == 1
+        if is_series_groupby:
+            # when ndim is 1, it is SeriesGroupBy. SeriesGroupBy does not implement numeric_only
+            # parameter even if it accepts the parameter, and the aggregation is handled the
+            # same as numeric_only is False.
+            if numeric_only and not is_numeric_dtype(self._query_compiler.dtypes[0]):
+                # pandas throws an NotImplementedError when the numeric_only is True, but the
+                # series dtype is not numeric
+                ErrorMessage.not_implemented(
+                    "SeriesGroupBy does not implement numeric_only"
+                )
+            numeric_only = False
+
+        if is_result_dataframe is None:
+            is_result_dataframe = not is_series_groupby
+        result_type = pd.DataFrame if is_result_dataframe else pd.Series
+        result = result_type(
+            query_compiler=qc_method(
+                self._query_compiler,
+                by=self._by,
+                axis=self._axis,
+                groupby_kwargs=self._kwargs,
+                agg_args=agg_args,
+                agg_kwargs=agg_kwargs,
+                numeric_only=numeric_only,
+                is_series_groupby=is_series_groupby,
+                **kwargs,
+            )
+        )
+        return result
+
+    def _check_index_name(self, result):
+        """
+        Check the result of groupby aggregation on the need of resetting index name.
+
+        Parameters
+        ----------
+        result : DataFrame
+            Group by aggregation result.
+
+        Returns
+        -------
+        DataFrame
+        """
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
+        if self._by is not None:
+            # pandas does not name the index for this case
+            result._query_compiler.set_index_name(None)
+        return result
+
+
+@_inherit_docstrings(
+    pandas.core.groupby.SeriesGroupBy, modify_doc=doc_replace_dataframe_with_link
+)
+class SeriesGroupBy(DataFrameGroupBy):
+    _pandas_class = pandas.core.groupby.SeriesGroupBy
+
+    @property
+    def ndim(self):
+        """
+        Return 1.
+
+        Returns
+        -------
+        int
+            Returns 1.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        return 1  # ndim is always 1 for Series
+
+    @property
+    def _iter(self):
+        """
+        Construct a tuple of (group_id, Series) tuples to allow iteration over groups.
+
+        Returns
+        -------
+        generator
+            Generator expression of GroupBy object broken down into tuples for iteration.
+        """
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="_iter", class_="GroupBy")
+
+    @property
+    def is_monotonic_decreasing(self):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(
+            name="is_monotonic_decreasing", class_="GroupBy"
+        )
+
+    @property
+    def is_monotonic_increasing(self):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(
+            name="is_monotonic_increasing", class_="GroupBy"
+        )
+
+    def aggregate(
+        self,
+        func: Optional[AggFuncType] = None,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        if is_dict_like(func):
+            raise SpecificationError(
+                "Value for func argument in dict format is not allowed for SeriesGroupBy."
+            )
+
+        return super().aggregate(
+            func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
+        )
+
+    agg = aggregate
+
+    def nlargest(self, n=5, keep="first"):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="nlargest", class_="GroupBy")
+
+    def nsmallest(self, n=5, keep="first"):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="nsmallest", class_="GroupBy")
+
+    def unique(self):
+        # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.method_not_implemented_error(name="unique", class_="GroupBy")
+
+    def apply(self, func, *args, **kwargs):
+        """Not implemented yet"""
+        # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+        ErrorMessage.not_implemented("apply is not implemented for SeriesGroupBy")
+
+
+def validate_groupby_args(
+    by: Any,
+    level: Optional[IndexLabel],
+    observed: Union[bool, NoDefault],
+) -> None:
+    """
+    Common validation and checks for the groupby arguments that are used by both SeriesGroupBy
+    and DataFrameGroupBy.
+
+    Raises:
+        TypeError if native pandas series is used as by item, or if both level and by are None
+    Warns:
+        If observed is True, this parameter is ignored because CategoryDType is not supported with Snowpark pandas API
+    """
+    # TODO: SNOW-1063350: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions
+    # check if pandas.Series is used as by item, no native pandas series or dataframe
+    # object is allowed.
+    raise_if_native_pandas_objects(by)
+    if not isinstance(by, Series) and is_list_like(by):
+        for o in by:
+            raise_if_native_pandas_objects(o)
+
+    if level is None and by is None:
+        raise TypeError("You have to supply one of 'by' and 'level'")
+
+    if observed is not no_default and observed:
+        WarningMessage.ignored_argument(
+            operation="groupby",
+            argument="observed",
+            message="CategoricalDType is not yet supported with Snowpark pandas API, the observed parameter is ignored.",
+        )
diff --git a/src/snowflake/snowpark/modin/pandas/indexing.py b/src/snowflake/snowpark/modin/pandas/indexing.py
new file mode 100644
index 00000000000..e9ffc5e471d
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/indexing.py
@@ -0,0 +1,1337 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+# noqa: MD02
+"""
+Details about how Indexing Helper Class works.
+
+_LocationIndexerBase provide methods framework for __getitem__
+  and __setitem__ that work with Modin DataFrame's internal index. Base
+  class's __{get,set}item__ takes in partitions & idx_in_partition data
+  and perform lookup/item write.
+
+_LocIndexer and _iLocIndexer is responsible for indexer specific logic and
+  lookup computation. Loc will take care of enlarge DataFrame. Both indexer
+  will take care of translating pandas' lookup to Modin DataFrame's internal
+  lookup.
+
+An illustration is available at
+https://github.com/ray-project/ray/pull/1955#issuecomment-386781826
+"""
+
+import itertools
+import numbers
+from typing import Any, Callable, Optional, Union
+
+import numpy as np
+import pandas
+from pandas._libs.tslibs import Resolution, parsing
+from pandas._typing import AnyArrayLike, Scalar
+from pandas.api.types import is_bool, is_list_like
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_datetime64_any_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_numeric_dtype,
+    pandas_dtype,
+)
+from pandas.core.indexing import IndexingError
+
+import snowflake.snowpark.modin.pandas as pd
+import snowflake.snowpark.modin.pandas.utils as frontend_utils
+from snowflake.snowpark.modin.pandas.base import BasePandasDataset
+from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+from snowflake.snowpark.modin.pandas.series import (
+    SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE,
+    SERIES_SETITEM_SLICE_AS_SCALAR_VALUE_ERROR_MESSAGE,
+    Series,
+)
+from snowflake.snowpark.modin.pandas.utils import is_scalar
+from snowflake.snowpark.modin.plugin._internal.indexing_utils import (
+    MULTIPLE_ELLIPSIS_INDEXING_ERROR_MESSAGE,
+    TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+
+INDEXING_KEY_TYPE = Union[Scalar, list, slice, Callable, tuple, AnyArrayLike]
+INDEXING_ITEM_TYPE = Union[Scalar, AnyArrayLike, pd.Series, pd.DataFrame]
+INDEXING_LOCATOR_TYPE = Union[Scalar, list, slice, tuple, pd.Series]
+
+ILOC_SET_INDICES_MUST_BE_INTEGER_OR_BOOL_ERROR_MESSAGE = (
+    "arrays used as indices must be of integer (or boolean) type"
+)
+ILOC_GET_REQUIRES_NUMERIC_INDEXERS_ERROR_MESSAGE = (
+    ".iloc requires numeric indexers, got {}"
+)
+LOC_SET_INCOMPATIBLE_INDEXER_WITH_DF_ERROR_MESSAGE = (
+    "Incompatible indexer with DataFrame"
+)
+LOC_SET_INCOMPATIBLE_INDEXER_WITH_SERIES_ERROR_MESSAGE = (
+    "Incompatible indexer with Series"
+)
+LOC_SET_INCOMPATIBLE_INDEXER_WITH_SCALAR_ERROR_MESSAGE = (
+    "Scalar indexer incompatible with {} item"
+)
+SET_CELL_WITH_LIST_LIKE_VALUE_ERROR_MESSAGE = (
+    "Currently do not support setting cell with list-like values"
+)
+
+
+ILOC_GET_DATAFRAME_INDEXER_NOT_ALLOWED_ERROR_MESSAGE = (
+    "DataFrame indexer is not allowed for .iloc\nConsider using"
+    " .loc for automatic alignment."
+)
+
+
+def is_boolean_array(x: Any) -> bool:
+    """
+    Check that argument is an array of bool.
+
+    Parameters
+    ----------
+    x : object
+        Object to check.
+
+    Returns
+    -------
+    bool
+        True if argument is an array of bool, False otherwise.
+    """
+
+    # special case empty list is not regarded as boolean array;
+    # because of later Numpy versions (for Python 3.9+), can't
+    # compare directly to [], but need workaround to detect list properly
+    if isinstance(x, list) and 0 == len(x):
+        return False
+
+    if isinstance(x, (np.ndarray, Series, pandas.Series, pandas.Index)):
+        # check dtype, if != object, no need to perform element-wise check
+        if pandas_dtype(x.dtype) != pandas_dtype("object"):
+            return is_bool_dtype(x.dtype)
+    elif isinstance(x, (DataFrame, pandas.DataFrame)):
+        return all(map(is_bool_dtype, x.dtypes))
+    return is_list_like(x) and all(map(is_bool, x))
+
+
+def is_2d_array(x: Any) -> bool:
+    """
+    Check that argument is a 2D array.
+
+    Parameters
+    ----------
+    x : object
+        Object to check.
+
+    Returns
+    -------
+    bool
+        True if argument is a 2D array, False otherwise.
+    """
+    return isinstance(x, (list, np.ndarray)) and len(x) > 0 and is_list_like(x[0])
+
+
+def is_range_like(obj: Any) -> bool:
+    """
+    Check if the object is range-like.
+
+    Objects that are considered range-like have information about the range (start and
+    stop positions, and step) and also have to be iterable. Examples of range-like
+    objects are: Python range, pandas.RangeIndex.
+
+    Parameters
+    ----------
+    obj : object
+
+    Returns
+    -------
+    bool
+    """
+    if not isinstance(obj, (DataFrame, Series)):
+        return (
+            hasattr(obj, "__iter__")
+            and hasattr(obj, "start")
+            and hasattr(obj, "stop")
+            and hasattr(obj, "step")
+        )
+    else:
+        # This would potentially have to change once RangeIndex is supported
+        return False
+
+
+def boolean_mask_to_numeric(indexer: Any) -> np.ndarray:
+    """
+    Convert boolean mask to numeric indices.
+
+    Parameters
+    ----------
+    indexer : list-like of booleans
+
+    Returns
+    -------
+    np.ndarray of ints
+        Numerical positions of ``True`` elements in the passed `indexer`.
+    """
+    if isinstance(indexer, (np.ndarray, Series, pandas.Series)):
+        return np.where(indexer)[0]
+    else:
+        # It's faster to build the resulting numpy array from the reduced amount of data via
+        # `compress` iterator than convert non-numpy-like `indexer` to numpy and apply `np.where`.
+        return np.fromiter(
+            # `itertools.compress` masks `data` with the `selectors` mask,
+            # works about ~10% faster than a pure list comprehension
+            itertools.compress(data=range(len(indexer)), selectors=indexer),
+            dtype=np.int64,
+        )
+
+
+def check_dict_or_set_indexers(key: Any) -> None:
+    """
+    Check if the indexer is or contains a dict or set, which is no longer allowed since pandas 2.0.
+    Our error messages and types are the same as pandas 2.0.
+
+    Raises
+    ----------
+    TypeError:
+        If key is set or dict type or a tuple with any set or dict type item.
+    """
+    if (
+        isinstance(key, set)
+        or isinstance(key, tuple)
+        and any(isinstance(x, set) for x in key)
+    ):
+        raise TypeError(
+            "Passing a set as an indexer is not supported. Use a list instead."
+        )
+
+    if (
+        isinstance(key, dict)
+        or isinstance(key, tuple)
+        and any(isinstance(x, dict) for x in key)
+    ):
+        raise TypeError(
+            "Passing a dict as an indexer is not supported. Use a list instead."
+        )
+
+
+def validate_positional_slice(slice_key: Any) -> None:
+    """
+    Validate slice start, stop, and step are int typed.
+
+    Parameters
+    ----------
+    slice_key : slice or is_range_like
+
+    Raises
+    ----------
+    TypeError:
+        If the start, stop, or step of slice_key is not None and is not integer.
+    """
+    for key in [slice_key.start, slice_key.stop, slice_key.step]:
+        if key is not None and not is_integer(key):
+            raise TypeError(
+                f"cannot do positional indexing with these indexers [{key}] of type {type(key).__name__}"
+            )
+
+
+class _LocationIndexerBase:
+    """
+    Base class for location indexer like loc and iloc.
+
+    Parameters
+    ----------
+    modin_df : modin.pandas.DataFrame
+        DataFrame to operate on.
+    """
+
+    api_name = "undefined"
+
+    def __init__(self, modin_df: BasePandasDataset) -> None:
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        self.df = modin_df
+        self.qc = modin_df._query_compiler
+
+    def _validate_key_length_with_ellipsis_stripping(self, key: tuple) -> tuple:
+        """
+        Validate tuple type key's length and strip leading ellipsis.
+
+        If tuple length is no greater than ndim of DataFrame df: return key
+        Else:
+            If the first entry is ellipsis, strip leading ellipsis and call this function
+        on the remaining tuple again.
+            Else raise IndexingError.
+
+        e.g. (..., 2 , 3) is reduced to (2 , 3); (..., 3) is reduced to (3,)
+        """
+        if len(key) > self.df.ndim:
+            if key[0] is Ellipsis:
+                # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
+                key = key[1:]
+                if Ellipsis in key:
+                    raise IndexingError(MULTIPLE_ELLIPSIS_INDEXING_ERROR_MESSAGE)
+                return self._validate_key_length_with_ellipsis_stripping(key)
+            raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+        return key
+
+    def __getitem__(self, key: INDEXING_KEY_TYPE) -> None:  # pragma: no cover
+        """
+        Retrieve dataset according to `key`.
+
+        Parameters
+        ----------
+        key : callable, scalar, or tuple
+            The global row index to retrieve data from.
+
+        Returns
+        -------
+        modin.pandas.DataFrame or modin.pandas.Series
+            Located dataset.
+
+        See Also
+        --------
+        pandas.DataFrame.loc
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        ErrorMessage.not_implemented("Implemented by subclasses")
+
+    def __setitem__(
+        self, key: INDEXING_KEY_TYPE, item: INDEXING_ITEM_TYPE
+    ) -> None:  # pragma: no cover
+        """
+        Assign `item` value to dataset located by `key`.
+
+        Parameters
+        ----------
+        key : callable or tuple
+            The global row numbers to assign data to.
+        item : modin.pandas.DataFrame, modin.pandas.Series or scalar
+            Value that should be assigned to located dataset.
+
+        See Also
+        --------
+        pandas.DataFrame.iloc
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        ErrorMessage.not_implemented("Implemented by subclasses")
+
+    def _should_squeeze(
+        self,
+        locator: Union[Scalar, list, slice, tuple, pd.Series],
+        axis: int,
+    ) -> Optional[bool]:
+        """
+        The method helps to make the decision whether squeeze is needed to get the final pandas object. Specifically,
+        squeeze is needed:
+        - if self is series and axis = 1
+        - if the locator are not scalar and tuple
+        Otherwise, the decision is not sure (return None)
+
+        Args:
+            locator: locator on the axis
+            axis: the axis to check
+
+        Returns:
+            A tuple of boolean values to indicate whether to squeeze on the two axis.
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        if axis == 1 and isinstance(self.df, Series):
+            # squeeze col is always False for Series
+            return False
+
+        not_dataset = not isinstance(locator, BasePandasDataset)
+        is_scalar_loc = not_dataset and is_scalar(locator)
+        is_tuple_loc = not_dataset and isinstance(locator, tuple)
+
+        if not is_scalar_loc and not is_tuple_loc:
+            # no need to squeeze if any axis key are not scalar or tuple
+            return False
+
+        # otherwise, not sure
+        return None
+
+    def _get_pandas_object_from_qc_view(
+        self,
+        qc_view: SnowflakeQueryCompiler,
+        *,
+        squeeze_row: bool,
+        squeeze_col: bool,
+    ) -> Union[Scalar, pd.Series, pd.DataFrame]:
+        """
+        Convert the query compiler view to the appropriate pandas object. The method helps to call squeeze to get the
+        final pandas object.
+        Args:
+            qc_view: SnowflakeQueryCompiler
+                Query compiler to convert.
+            squeeze_row: bool
+                Whether to squeeze row
+            squeeze_col: bool
+                Whether to squeeze column
+
+        Returns: DataFrame, Series or Scalar
+            The pandas object with the data from the query compiler view.
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        res_df = self.df.__constructor__(query_compiler=qc_view)
+
+        if not squeeze_row and not squeeze_col:
+            return res_df
+
+        if squeeze_row and squeeze_col:
+            axis = None
+        elif squeeze_row:
+            axis = 0
+        else:
+            axis = 1
+        return res_df.squeeze(axis=axis)
+
+    def _parse_row_and_column_locators(
+        self, key: INDEXING_KEY_TYPE
+    ) -> tuple[INDEXING_LOCATOR_TYPE, INDEXING_LOCATOR_TYPE]:
+        """
+        Unpack the user input. This shared parsing helper method is used by both iloc and loc's getitem and setitem.
+
+        Examples:
+            loc[:] -> (slice(None), slice(None))
+            loc[a] -> (a, slice(None))
+            loc[,b] -> (slice(None), b)
+            loc[a,:] -> (a, slice(None))
+            loc[:,b] -> (slice(None), b)
+            loc[a,...] -> (a, slice(None))
+            loc[...,b] -> (slice(None), b)
+            loc[[a,b]] -> ([a,b], slice(None)),
+            loc[a,b] -> ([a], [b])
+            loc[...,a,b] -> ([a], [b])
+            loc[lambda df: df.col > 0,b] -> (df.col > 0, [b])
+            (same for iloc too)
+
+        Args:
+            key: User input to unpack.
+
+        Returns:
+            row_loc : scalar or list
+                Row locator(s) as a scalar or list.
+            col_loc : scalar or list
+                Column locator(s) as a scalar or list.
+
+        Raises:
+            index error if key is tuple(...,...)
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        row_loc: INDEXING_LOCATOR_TYPE = slice(None)
+        col_loc: INDEXING_LOCATOR_TYPE = slice(None)
+        if isinstance(key, tuple):
+            key = self._validate_key_length_with_ellipsis_stripping(key)
+            if len(key) > 2:
+                raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+            if len(key) > 0:
+                row_loc = key[0]
+            if len(key) == 2:
+                if key[0] is Ellipsis and key[1] is Ellipsis:
+                    raise IndexingError(MULTIPLE_ELLIPSIS_INDEXING_ERROR_MESSAGE)
+                col_loc = key[1]
+        else:
+            row_loc = key
+
+        def _parse_locator(_key: INDEXING_LOCATOR_TYPE) -> INDEXING_LOCATOR_TYPE:
+            # Ellipsis to slice(None)
+            if _key is Ellipsis:
+                return slice(None)
+            # callable will be evaluated to use the result as locator
+            if callable(_key):
+                _key = _key(self.df)
+            return _key
+
+        return _parse_locator(row_loc), _parse_locator(col_loc)
+
+    def _parse_get_row_and_column_locators(
+        self, key: INDEXING_KEY_TYPE
+    ) -> tuple[
+        Union[Scalar, list, slice, tuple, pd.Series],
+        Union[Scalar, list, slice, tuple, pd.Series],
+    ]:
+        """Used by loc and iloc.  See _LocationIndexerBase._parse_row_and_column_locators"""
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        row_key, col_key = self._parse_row_and_column_locators(key)
+        self._validate_get_locator_key(row_key)
+        self._validate_get_locator_key(col_key)
+
+        return row_key, col_key
+
+    def _parse_set_row_and_column_locators(
+        self, key: INDEXING_KEY_TYPE
+    ) -> tuple[
+        Union[Scalar, list, slice, tuple, pd.Series],
+        Union[Scalar, list, slice, tuple, pd.Series],
+    ]:
+        """Used by loc and iloc.  See _LocationIndexerBase._parse_row_and_column_locators"""
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        row_key, col_key = self._parse_row_and_column_locators(key)
+        self._validate_set_locator_key(row_key)
+        self._validate_set_locator_key(col_key)
+
+        return row_key, col_key
+
+    def _is_multiindex_full_lookup(
+        self, axis: int, key: Union[Scalar, list, slice, tuple, pd.Series]
+    ) -> bool:
+        """
+        Determine if the key will perform a full lookup for MultiIndex. "Multiindex full lookup" is True only when the
+        axis is MultiIndex and the key is a tuple and the number of levels matches up with the length of the tuple key.
+        When it is True, pandas will drop all levels from the multiindex axis and call squeeze on the axis.
+
+        Examples:
+            if self has a three level multiindex ["l0","l1","l2], then key has to be a tuple with length equals to 3 to
+            perform a multiindex full lookup.
+
+        Args:
+            axis: {0, 1}
+                0 for row, 1 for column.
+            key: Scalar, tuple, or other list like
+                Lookup key for MultiIndex row/column.
+
+        Returns: bool
+            True if the key will perform a full lookup for the MultiIndex.
+
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        if not self.qc.has_multiindex(axis=axis):
+            return False
+
+        if not isinstance(key, tuple):
+            return False
+
+        if any(isinstance(key_level, slice) for key_level in key):
+            # do not squeeze if any level of the key is a slice
+            return False
+
+        return len(key) == self.qc.nlevels(axis)
+
+    def _validate_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """Validate indexing key type.
+
+        Parameters
+        ----------
+        key: indexing key
+
+        Raises
+        ------
+        TypeError:
+            native pandas object.
+            set or dict.
+            all other types out of scalar, list like, slice, series, or, index.
+            For iloc, raise if scalar is not integer
+        IndexingError:
+            tuple.
+        ValueError:
+            SnowDataFrame.
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        frontend_utils.raise_if_native_pandas_objects(key)
+        check_dict_or_set_indexers(key)
+
+        if not (
+            is_scalar(key)
+            or isinstance(key, (pd.Series, slice))
+            or is_list_like(key)
+            or is_range_like(key)
+        ):
+            raise TypeError(
+                f".{self.api_name} requires scalars, list-like indexers, slices, or ranges. Got {key}"
+            )
+
+    def _validate_get_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """
+        Helper function to validate the locator key for get is valid.
+
+        Parameter:
+        ----------
+        key: get locator key
+
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        self._validate_locator_key(key)
+
+    def _validate_set_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """
+        Helper function to validate the locator key for set is valid.
+
+        Parameter:
+        ----------
+        key: set locator key
+
+        """
+        # TODO: SNOW-1063351: Modin upgrade - modin.pandas.indexing._LocationIndexerBase
+        self._validate_locator_key(key)
+
+
+class _LocIndexer(_LocationIndexerBase):
+    """
+    An indexer for modin_df.loc[] functionality.
+
+    Parameters
+    ----------
+    modin_df : modin.pandas.DataFrame
+        DataFrame to operate on.
+    """
+
+    api_name = "loc"
+
+    def _should_squeeze(
+        self,
+        locator: Union[Scalar, list, slice, tuple, pd.Series],
+        axis: int,
+    ) -> bool:
+        """
+        The method helps to make the decision whether squeeze is needed to get the final pandas object. Specifically,
+        squeeze is needed:
+        - if self is series and axis = 1
+        - if the locator are not scalar and tuple
+        - if the locator is scalar but on a multiindex
+        - if it is a multiindex full lookup, i.e., an exact match on the multiindex
+
+        Args:
+            locator: locator on the axis
+            axis: the axis to check
+
+        Returns:
+            A tuple of boolean values to indicate whether to squeeze on the two axis.
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        do_squeeze = super()._should_squeeze(locator, axis)
+        if do_squeeze is not None:
+            return do_squeeze
+
+        not_dataset = not isinstance(locator, BasePandasDataset)
+        is_scalar_loc = not_dataset and is_scalar(locator)
+        is_tuple_loc = not_dataset and isinstance(locator, tuple)
+
+        if (is_scalar_loc or is_tuple_loc) and not self.qc.is_multiindex(axis=axis):
+            # for single index, if the locator is scalar or tuple, then squeeze is needed
+            return True
+
+        if self._is_multiindex_full_lookup(axis=axis, key=locator):
+            # for multiindex, squeeze is needed only when full lookup happens, i.e., exact match on all levels.
+            return True
+
+        # otherwise, no squeeze is needed
+        return False
+
+    def _parse_row_and_column_locators(
+        self, key: INDEXING_KEY_TYPE
+    ) -> tuple[
+        Union[Scalar, list, slice, tuple, pd.Series],
+        Union[Scalar, list, slice, tuple, pd.Series],
+    ]:
+        """
+        Unpack the user input. This shared parsing helper method is used by both iloc and loc's getitem and setitem.
+
+        Examples:
+            loc[:] -> (slice(None), slice(None))
+            loc[a] -> (a, slice(None))
+            loc[,b] -> (slice(None), b)
+            loc[a,:] -> (a, slice(None))
+            loc[:,b] -> (slice(None), b)
+            loc[a,...] -> (a, slice(None))
+            loc[...,b] -> (slice(None), b)
+            loc[[a,b]] -> ([a,b], slice(None)),
+            loc[a,b] -> ([a], [b])
+            loc[...,a,b] -> ([a], [b])
+            loc[lambda df: df.col > 0,b] -> (df.col > 0, [b])
+            Also, for multiindex cases used by loc:
+            loc[("level0", "level1")] -> (("level0", "level1"), slice(None))
+
+        Args:
+            key: User input to unpack.
+
+        Returns:
+            row_loc : scalar or list
+                Row locator(s) as a scalar or list.
+            col_loc : scalar or list
+                Column locator(s) as a scalar or list.
+
+        Raises:
+            index error if key is tuple(...,...)
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        if isinstance(key, tuple):
+            is_nested_tuple = any([not is_scalar(k) for k in key])
+            if (
+                self.qc.is_multiindex(axis=0)
+                and not is_nested_tuple
+                and not (self.df.ndim == 2 and self.qc.is_multiindex(axis=1))
+            ):
+                # always treat tuple loc key as row_loc when the key is not nested tuple and the frame is a Series or
+                # the frame's column is not multiindex
+                # e.g., df.loc['cobra', 'mark i'], key = ('cobra', 'mark i') should be treated as row_loc if the row is
+                # multiindex or the frame is a Series
+                row_loc = key
+                if len(row_loc) > self.qc.nlevels(axis=0):
+                    raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+                return row_loc, slice(None)
+
+        return super()._parse_row_and_column_locators(key)
+
+    def _locator_type_convert(
+        self, locator: INDEXING_LOCATOR_TYPE
+    ) -> Union[INDEXING_LOCATOR_TYPE, "SnowflakeQueryCompiler"]:
+        """
+        A helper function to convert locator type before passing to the backend
+        Args:
+            locator: row or column locator
+
+        Returns:
+            Processed locator
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        if isinstance(locator, pd.Series):
+            locator = locator._query_compiler
+        elif not isinstance(locator, slice) and is_range_like(locator):
+            locator = slice(locator.start, locator.stop, locator.step)  # type: ignore[union-attr]
+        return locator
+
+    def _try_partial_string_indexing(
+        self, row_loc: Union[Scalar, list, slice, tuple, pd.Series]
+    ) -> Union[Scalar, list, slice, tuple, pd.Series]:
+        """
+        Try to convert row locator to slice if it matches partial string indexing criteria:
+            1. `row_loc` needs to be a valid datetime string
+            2. the index is datetime type
+
+        Args:
+            row_loc: the original row locator
+
+        Returns:
+            the new row locator for partial string indexing; otherwise, the original row locator
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+
+        def _try_partial_string_indexing_for_string(
+            row_loc: str,
+        ) -> Union[Scalar, list, slice, tuple, pd.Series]:
+            """
+            Convert string `row_loc` into slice if it matches the partial string indexing criteria. Otherwise, return
+            the original `row_loc`.
+
+            Args:
+                row_loc: input
+
+            Returns:
+                slice or the original `row_loc`
+            """
+            # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+            try:
+                parsed, reso_str = parsing.parse_datetime_string_with_reso(row_loc)
+            except ValueError:
+                return row_loc
+
+            # extract tzinfo first since Period will drop tzinfo later; then the tzinfo will be added back when
+            # assembling the final slice
+            tzinfo = parsed.tzinfo
+            reso = Resolution.from_attrname(reso_str)
+            period = pd.Period(parsed, freq=reso.attr_abbrev)
+
+            # partial string indexing only works for DatetimeIndex
+            if is_datetime64_any_dtype(self.df._query_compiler.index_dtypes[0]):
+                return slice(
+                    pd.Timestamp(period.start_time, tzinfo=tzinfo),
+                    pd.Timestamp(period.end_time, tzinfo=tzinfo),
+                )
+
+            return row_loc
+
+        if isinstance(row_loc, str):
+            return _try_partial_string_indexing_for_string(row_loc)
+
+        if isinstance(row_loc, slice):
+            start, stop = row_loc.start, row_loc.stop
+            if isinstance(row_loc.start, str):
+                start = _try_partial_string_indexing_for_string(row_loc.start)
+                if isinstance(start, slice):
+                    start = start.start
+            if isinstance(row_loc.stop, str):
+                stop = _try_partial_string_indexing_for_string(row_loc.stop)
+                if isinstance(stop, slice):
+                    stop = stop.stop
+            # partial string indexing only updates start and stop, and should keep using the original step.
+            row_loc = slice(start, stop, row_loc.step)
+
+        return row_loc
+
+    def __getitem__(
+        self, key: INDEXING_KEY_TYPE
+    ) -> Union[Scalar, pd.Series, pd.DataFrame]:
+        """
+        Retrieve dataset according to `key`.
+
+        Parameters
+        ----------
+        key : callable, scalar, list-like, boolean mask, Snowpark pandas Series, slice, or size-two tuple of these
+            The 2D locator.
+
+        Returns
+        -------
+        modin.pandas.DataFrame or modin.pandas.Series
+            Located dataset.
+
+        See Also
+        --------
+        pandas.DataFrame.loc
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        row_loc, col_loc = self._parse_get_row_and_column_locators(key)
+        row_loc = self._try_partial_string_indexing(row_loc)
+        squeeze_row, squeeze_col = self._should_squeeze(
+            locator=row_loc, axis=0
+        ), self._should_squeeze(locator=col_loc, axis=1)
+
+        qc_view = self.qc.take_2d_labels(
+            self._locator_type_convert(row_loc), self._locator_type_convert(col_loc)
+        )
+
+        result = self._get_pandas_object_from_qc_view(
+            qc_view, squeeze_row=squeeze_row, squeeze_col=squeeze_col
+        )
+        if isinstance(result, Series):
+            result._parent = self.df
+            result._parent_axis = 0
+
+        return result
+
+    def _loc_set_matching_item_columns_by_label(
+        self, key: INDEXING_KEY_TYPE, item: INDEXING_ITEM_TYPE
+    ) -> bool:
+        """
+        Decide whether loc set behavior is to match item columns by label or by position.
+        Note: loc set's behavior is different when key is a tuple of row and col keys vs. key is a row key only. When
+        key is tuple (e.g., df.loc[row_key, col_key] = item), only ``item``'s column labels that match with col_key are
+        used to set df values; otherwise, (e.g., df.loc[row_key_only] = item), loc set columns based on ``item``'s
+        column positions not labels. E.g., df has columns ["A", "B", "C"] and item has columns ["C", "B", "A"],
+        df.loc[:] = item will update df's columns "A", "B", "C" using item column "C", "B", "A" respectively.
+        TODO: SNOW-972417 pandas has some complicated logic to use dtypes from both self df and item to decide whether
+        the loc set behavior for df.loc[row_key, col_key] = item) is matching by label or not. Further effort is needed
+        to decide what the right behavior for Snowpark pandas.
+
+        Args:
+            key: loc key
+            item: the RHS in loc set
+
+        Returns:
+            True if matching item by label
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        if is_2d_array(item):
+            return False
+        return (
+            isinstance(self.df, pd.DataFrame)
+            and isinstance(key, tuple)
+            and not is_scalar(
+                key[1]
+            )  # e.g., df.loc[:, 'A'] = item is matching item by position
+            and isinstance(item, pd.DataFrame)
+        )
+
+    def __setitem__(
+        self,
+        key: INDEXING_KEY_TYPE,
+        item: INDEXING_ITEM_TYPE,
+    ) -> None:
+        """
+        Assign `item` value to dataset located by label `key`.
+
+        Args:
+            key: indexing key type
+            item: indexing item type
+
+        See Also:
+        DataFrame.loc
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        row_loc, col_loc = self._parse_row_and_column_locators(key)
+
+        # TODO SNOW-962260 support multiindex
+        if self.qc.is_multiindex(axis=0) or self.qc.is_multiindex(axis=1):
+            ErrorMessage.not_implemented(
+                "loc set for multiindex is not yet implemented"
+            )
+
+        self._validate_item_type(item, row_loc)
+
+        # If the row key is list-like (Index, list, np.ndarray, etc.), convert it to Series.
+        if not isinstance(row_loc, pd.Series) and is_list_like(row_loc):
+            row_loc = pd.Series(row_loc)
+
+        matching_item_columns_by_label = self._loc_set_matching_item_columns_by_label(
+            key, item
+        )
+        item_is_2d_array = is_2d_array(item)
+        matching_item_rows_by_label = not item_is_2d_array
+
+        index_is_bool_indexer = isinstance(
+            row_loc, BasePandasDataset
+        ) and is_bool_dtype(row_loc.dtypes)
+
+        index = (
+            row_loc._query_compiler
+            if isinstance(row_loc, BasePandasDataset)
+            else row_loc
+        )
+        columns = (
+            col_loc._query_compiler
+            if isinstance(col_loc, BasePandasDataset)
+            else col_loc
+        )
+        if item_is_2d_array:
+            item = pd.DataFrame(item)
+        item = item._query_compiler if isinstance(item, BasePandasDataset) else item
+        new_qc = self.qc.set_2d_labels(
+            index,
+            columns,
+            item,
+            matching_item_columns_by_label=matching_item_columns_by_label,
+            matching_item_rows_by_label=matching_item_rows_by_label,
+            index_is_bool_indexer=index_is_bool_indexer,
+        )
+
+        self.df._update_inplace(new_query_compiler=new_qc)
+
+    def _validate_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """Used by loc.  See LocationIndexerBase._validate_locator_key"""
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        super()._validate_locator_key(key)
+        if isinstance(key, pd.DataFrame):
+            raise ValueError("Cannot index with multidimensional key")
+
+    def _validate_item_type(
+        self,
+        item: INDEXING_ITEM_TYPE,
+        row_loc: Union[Scalar, list, slice, tuple, AnyArrayLike],
+    ) -> None:
+        """
+        Validate item data type for loc set. Raise error if the type is invalid.
+        Args:
+            item: the item to set
+            row_loc: row locator
+
+        Returns:
+            None
+        """
+        # TODO: SNOW-1063352: Modin upgrade - modin.pandas.indexing._LocIndexer
+        frontend_utils.raise_if_native_pandas_objects(item)
+
+        if isinstance(self.df, pd.Series):
+            if isinstance(item, pd.DataFrame):
+                raise ValueError(LOC_SET_INCOMPATIBLE_INDEXER_WITH_DF_ERROR_MESSAGE)
+            elif is_scalar(row_loc) and (
+                isinstance(item, pd.Series) or is_list_like(item)
+            ):
+                ErrorMessage.not_implemented(
+                    SET_CELL_WITH_LIST_LIKE_VALUE_ERROR_MESSAGE
+                )
+        else:
+            if is_scalar(row_loc) and (
+                isinstance(item, pd.DataFrame) or is_2d_array(item)
+            ):
+                raise ValueError(
+                    LOC_SET_INCOMPATIBLE_INDEXER_WITH_SCALAR_ERROR_MESSAGE.format(
+                        item.__class__.__name__
+                    )
+                )
+
+        if (isinstance(row_loc, pd.Series) or is_list_like(row_loc)) and (
+            isinstance(item, range)
+        ):
+            ErrorMessage.not_implemented(
+                SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE
+            )
+
+        if isinstance(item, slice):
+            # Here, the whole slice is assigned as a scalar variable, i.e., a spot at an index gets a slice value.
+            ErrorMessage.not_implemented(
+                SERIES_SETITEM_SLICE_AS_SCALAR_VALUE_ERROR_MESSAGE
+            )
+
+
+class _iLocIndexer(_LocationIndexerBase):
+    """
+    An indexer for modin_df.iloc[] functionality.
+
+    Parameters
+    ----------
+    modin_df : modin.pandas.DataFrame
+        DataFrame to operate on.
+    """
+
+    api_name = "iloc"
+
+    def _should_squeeze(
+        self,
+        locator: Union[Scalar, list, slice, tuple, pd.Series],
+        axis: int,
+    ) -> bool:
+        """
+        The method helps to make the decision whether squeeze is needed to get the final pandas object. Specifically,
+        squeeze is needed:
+        - if self is series and axis = 1
+        - if the locator are not scalar and tuple
+        - if the locator is scalar
+
+        Args:
+            locator: locator on the axis
+            axis: the axis to check
+
+        Returns:
+            A tuple of boolean values to indicate whether to squeeze on the two axis.
+        """
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        do_squeeze = super()._should_squeeze(locator, axis)
+        if do_squeeze is not None:
+            return do_squeeze
+
+        not_dataset = not isinstance(locator, BasePandasDataset)
+        is_scalar_loc = not_dataset and is_scalar(locator)
+        if is_scalar_loc:
+            return True
+
+        # otherwise, no squeeze is needed
+        return False
+
+    @staticmethod
+    def _convert_range_to_valid_slice(range_key: Any) -> slice:
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        start, stop, step = range_key.start, range_key.stop, range_key.step
+        # range has different logic from slice: slice can handle cases where (start > stop and step > 0)
+        # and (start < stop and step < 0) but range has an empty result for this. For example, slice(3, -1, 1)
+        # and slice(-1, 0, -1) are not empty results but range is.
+        if (start > stop and step > 0) or (start < stop and step < 0):
+            return slice(0, 0, 1)
+        else:
+            return slice(start, stop, step)
+
+    def __getitem__(
+        self,
+        key: INDEXING_KEY_TYPE,
+    ) -> Union[Scalar, pd.DataFrame, pd.Series]:
+        """
+        Retrieve dataset according to positional `key`.
+
+        Args:
+            key: int, bool, list like of int or bool, slice of int, series, callable or tuple
+                The global row numbers to retrieve data from.
+
+        Returns:
+            DataFrame, Series, or scalar.
+        """
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        row_loc, col_loc = self._parse_get_row_and_column_locators(key)
+        squeeze_row = self._should_squeeze(locator=row_loc, axis=0)
+        squeeze_col = self._should_squeeze(locator=col_loc, axis=1)
+
+        original_row_loc = row_loc  # keep a copy for error message
+
+        # Convert range to slice objects.
+        if not isinstance(row_loc, pd.Series) and is_range_like(row_loc):
+            row_loc = self._convert_range_to_valid_slice(row_loc)
+        if not isinstance(col_loc, pd.Series) and is_range_like(col_loc):
+            col_loc = self._convert_range_to_valid_slice(col_loc)
+
+        # Convert all scalar, list-like, and indexer row_loc to a Series object to get a query compiler object.
+        if is_scalar(row_loc):
+            row_loc = pd.Series([row_loc])
+        elif is_list_like(row_loc):
+            if hasattr(row_loc, "dtype"):
+                dtype = row_loc.dtype
+            elif not row_loc:
+                # If the list-like object is empty, we need to explicitly specify a dtype
+                dtype = float
+            else:
+                dtype = None
+            row_loc = pd.Series(row_loc, dtype=dtype)
+
+        # Check whether the row and column input is of numeric dtype.
+        self._validate_numeric_get_key_values(row_loc, original_row_loc)
+        self._validate_numeric_get_key_values(col_loc)
+
+        if isinstance(row_loc, pd.Series):
+            # Get the corresponding query compiler object.
+            row_loc = row_loc._query_compiler
+
+        if isinstance(col_loc, pd.Series):
+            col_loc = col_loc.to_list()
+
+        qc_view = self.qc.take_2d_positional(row_loc, col_loc)
+        result = self._get_pandas_object_from_qc_view(
+            qc_view,
+            squeeze_row=squeeze_row,
+            squeeze_col=squeeze_col,
+        )
+
+        if isinstance(result, Series):
+            result._parent = self.df
+            result._parent_axis = 0
+        return result
+
+    def _get_pandas_object_from_qc_view(
+        self,
+        qc_view: SnowflakeQueryCompiler,
+        *,
+        squeeze_row: bool,
+        squeeze_col: bool,
+    ) -> Union[Scalar, list, pd.Series, pd.DataFrame]:
+        """
+        Convert the query compiler view to the appropriate pandas object.
+
+        Args:
+            qc_view: SnowflakeQueryCompiler
+                Query compiler to convert.
+            squeeze_row: bool
+                Whether to squeeze row
+            squeeze_col: bool
+                Whether to squeeze column
+
+        Returns: DataFrame, Series or Scalar
+            The pandas object with the data from the query compiler view.
+        """
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        res_df = self.df.__constructor__(query_compiler=qc_view)
+
+        if not squeeze_row and not squeeze_col:
+            return res_df
+
+        if squeeze_row and squeeze_col:
+            res = res_df.to_pandas().squeeze()
+            # res can be an empty pandas series where the key is out of bounds, here we convert to an empty list to
+            # avoid return a native pandas object
+            if isinstance(res, pandas.Series):
+                res = []
+            return res
+
+        if squeeze_row:
+            if isinstance(res_df, pd.Series):
+                # call _reduce_dimension directly instead of calling series.squeeze() to avoid to call len(res_df)
+                res = res_df._reduce_dimension(res_df._query_compiler)
+                # res can be an empty pandas series where the key is out of bounds, here we convert to an empty list to
+                # avoid return a native pandas object
+                if isinstance(res, pandas.Series):
+                    res = []
+                return res
+            return Series(query_compiler=res_df.T._query_compiler)
+
+        # where only squeeze_col is True
+        len_columns = len(res_df.columns)
+        if len_columns == 1:
+            return Series(query_compiler=res_df._query_compiler)
+        else:
+            return res_df.copy()
+
+    def __setitem__(
+        self,
+        key: INDEXING_KEY_TYPE,
+        item: INDEXING_ITEM_TYPE,
+    ) -> None:
+        """
+        Assign `item` value to dataset located by `key`.
+
+        Parameters
+        ----------
+        key : callable or tuple
+            The global row numbers to assign data to.
+        item : modin.pandas.DataFrame, modin.pandas.Series, scalar or list like of similar
+            Value that should be assigned to located dataset.
+        """
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        row_loc, col_loc = self._parse_set_row_and_column_locators(key)
+
+        self._validate_numeric_set_key_values(row_loc)
+        self._validate_numeric_set_key_values(col_loc)
+
+        is_item_series = isinstance(item, pd.Series)
+
+        if not isinstance(item, BasePandasDataset) and is_list_like(item):
+            if isinstance(self.df, pd.Series) and is_scalar(row_loc):
+                ErrorMessage.not_implemented(
+                    SET_CELL_WITH_LIST_LIKE_VALUE_ERROR_MESSAGE
+                )
+
+            if isinstance(item, pandas.Index):
+                item = np.array(item.tolist()).transpose()
+            else:
+                item = np.array(item)
+
+            if all(sz == 1 for sz in item.shape):
+                # Treat as a scalar if a single value regardless of dimensions
+                item = item.flatten()[0]
+            else:
+                if item.ndim == 1:
+                    item = pd.Series(item)
+                    is_item_series = True
+                else:
+                    item = pd.DataFrame(item)
+
+        is_row_key_df = isinstance(row_loc, pd.DataFrame)
+        is_col_key_df = isinstance(col_loc, pd.DataFrame)
+
+        # The semantics of iloc setitem differ if the row and col key are both
+        # tuples or dataframes, in particular they set as row, key location coordinates
+        # rather than entire rows or columns.  So for example
+        #
+        # row_key=[1,2] and col_key=[3,4] would be locations (1,3), (1,4), (2,3), (2,4)
+        # but
+        # row_key=(1,2) and col_key=(3,4) would only set locations (1,3), (2, 4).
+
+        if not is_row_key_df and not is_col_key_df:
+            set_as_coords = isinstance(row_loc, tuple) or isinstance(col_loc, tuple)
+        else:
+            set_as_coords = is_row_key_df and is_col_key_df
+
+        new_qc = self.qc.set_2d_positional(
+            row_loc._query_compiler
+            if isinstance(row_loc, BasePandasDataset)
+            else row_loc,
+            col_loc._query_compiler
+            if isinstance(col_loc, BasePandasDataset)
+            else col_loc,
+            item._query_compiler if isinstance(item, BasePandasDataset) else item,
+            set_as_coords,
+            is_item_series,
+        )
+
+        self.df._create_or_update_from_compiler(new_qc, inplace=True)
+
+    def _validate_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """Used by iloc.  See _LocationIndexerBase._validate_locator_key"""
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        super()._validate_locator_key(key)
+
+        if isinstance(key, pd.MultiIndex):
+            raise TypeError("key of type MultiIndex cannot be used with iloc")
+
+    def _validate_get_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """Used by iloc.  See _LocationIndexerBase._validate_get_locator_key"""
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        super()._validate_get_locator_key(key)
+
+        if is_scalar(key) and not is_integer(key):
+            raise IndexError(
+                ILOC_GET_REQUIRES_NUMERIC_INDEXERS_ERROR_MESSAGE.format(key)
+            )
+
+        # Tuple e.g. (1, 2)
+        if isinstance(key, tuple):
+            # `key` is not allowed to be tuple since nested tuple is not allowed.
+            # `key` here, which is a 1d indexing key, is generated from 2d indexing key which split into two 1d indexing
+            # keys if is tuple type. e,g. 2d_key = ((1,2),0), then 1d key for row key=(1,2). This is not allowed.
+            raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+
+        if isinstance(key, pd.DataFrame):
+            raise IndexError(ILOC_GET_DATAFRAME_INDEXER_NOT_ALLOWED_ERROR_MESSAGE)
+
+    def _validate_set_locator_key(self, key: INDEXING_KEY_TYPE) -> None:
+        """Used by iloc.  See _LocationIndexerBase._validate_set_locator_key"""
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        super()._validate_set_locator_key(key)
+
+        if is_scalar(key) and not is_integer(key):
+            raise IndexError(
+                ILOC_SET_INDICES_MUST_BE_INTEGER_OR_BOOL_ERROR_MESSAGE.format(key)
+            )
+
+    def _are_valid_numeric_key_values(
+        self,
+        key: Union[slice, int, list[int], list[bool], AnyArrayLike],
+        is_valid_numeric_dtype: Callable = is_numeric_dtype,
+        is_valid_numeric_type: Callable = lambda v: isinstance(v, numbers.Number),
+    ) -> bool:
+        """
+        Validate iloc input key type after relevant type conversion.
+
+        Args:
+            key: positional key or pd.Series version of positional key
+            is_valid_numeric_dtype: callable that checks numeric dtype
+            is_valid_numeric_type: callable that checks numeric type
+
+        Returns:
+            bool: True if the key is valid else False for invalid key
+
+        Notes:
+            Snowpark pandas implicitly allows float list like or series values to be compatible with pandas.
+            For row values, array-like objects, Index objects, and scalars must be converted to a Series object
+            before calling this method. The original key should be passed in along with the Series version for
+            printing the error message.
+
+        Raises:
+            Series:
+                validate numeric type;
+            Scalar:
+                validate numeric type;
+            slice or range like:
+                validate start, stop, and step are int type.
+            list_like:
+                validate numeric type;
+            Other invalid types:
+                raise IndexingError.
+        """
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        if isinstance(key, pd.Series):
+            return is_valid_numeric_dtype(key.dtype)
+        elif isinstance(key, slice) or is_range_like(key):
+            validate_positional_slice(key)
+        elif hasattr(key, "dtype"):
+            return is_valid_numeric_dtype(key.dtype)
+        elif is_list_like(key):
+            return all(
+                is_scalar(x) and (is_bool(x) or is_valid_numeric_type(x)) for x in key  # type: ignore[union-attr]
+            )
+        elif is_scalar(key):
+            return is_valid_numeric_type(key)
+
+        return True
+
+    def _validate_numeric_get_key_values(
+        self,
+        key: Union[slice, int, list[int], list[bool], AnyArrayLike],
+        original_key: Union[slice, int, list[int], list[bool], AnyArrayLike] = None,
+    ) -> None:
+        """See _iLocIndexer._validate_numeric_key_values"""
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        are_valid = self._are_valid_numeric_key_values(key)
+        if not are_valid:
+            raise IndexError(
+                ILOC_GET_REQUIRES_NUMERIC_INDEXERS_ERROR_MESSAGE.format(
+                    key if original_key is None else original_key
+                )
+            )
+
+    def _validate_numeric_set_key_values(
+        self,
+        key: Union[slice, int, list[int], list[bool], AnyArrayLike],
+    ) -> None:
+        """See _iLocIndexer._validate_numeric_key_values"""
+        # TODO: SNOW-1063355: Modin upgrade - modin.pandas.indexing._iLocIndexer
+        are_valid = self._are_valid_numeric_key_values(
+            key,
+            lambda k: is_integer_dtype(k) or is_bool_dtype(k),
+            lambda k: isinstance(k, numbers.Integral) or is_bool(k),
+        )
+        if not are_valid:
+            raise IndexError(ILOC_SET_INDICES_MUST_BE_INTEGER_OR_BOOL_ERROR_MESSAGE)
diff --git a/src/snowflake/snowpark/modin/pandas/io.py b/src/snowflake/snowpark/modin/pandas/io.py
new file mode 100644
index 00000000000..09fa448e97e
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/io.py
@@ -0,0 +1,1163 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Implement I/O public API as pandas does.
+
+Almost all docstrings for public and magic methods should be inherited from pandas
+for better maintability.
+Manually add documentation for methods which are not presented in pandas.
+"""
+
+from __future__ import annotations
+
+import csv
+import inspect
+import pathlib
+import pickle
+from collections import OrderedDict
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    AnyStr,
+    Callable,
+    Hashable,
+    Iterable,
+    Iterator,
+    Literal,
+    Pattern,
+    Sequence,
+)
+
+import numpy as np
+import pandas
+from pandas._libs.lib import NoDefault, no_default
+from pandas._typing import (
+    CompressionOptions,
+    ConvertersArg,
+    CSVEngine,
+    DtypeArg,
+    DtypeBackend,
+    FilePath,
+    IndexLabel,
+    IntStrT,
+    ParseDatesArg,
+    ReadBuffer,
+    ReadCsvBuffer,
+    StorageOptions,
+    XMLParsers,
+)
+from pandas.io.parsers import TextFileReader
+from pandas.io.parsers.readers import _c_parser_defaults
+
+# add this line to enable doc tests to run
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.config import ExperimentalNumPyAPI
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_standalone_function_decorator,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import (
+    SupportsPrivateToNumPy,
+    SupportsPrivateToPandas,
+    SupportsPublicToNumPy,
+    _inherit_docstrings,
+    classproperty,
+    expanduser_path_arg,
+)
+
+# below logic is to handle circular imports without errors
+if TYPE_CHECKING:  # pragma: no cover
+    from .dataframe import DataFrame
+    from .series import Series
+
+# TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available
+
+
+class ModinObjects:
+    """Lazily import Modin classes and provide an access to them."""
+
+    _dataframe = None
+
+    @classproperty
+    def DataFrame(cls):
+        """Get ``modin.pandas.DataFrame`` class."""
+        if cls._dataframe is None:
+            from .dataframe import DataFrame
+
+            cls._dataframe = DataFrame
+        return cls._dataframe
+
+
+def _read(
+    **kwargs,
+):  # pragma: no cover: our frontend currently overrides read_csv, so this is unused
+    """
+    Read csv file from local disk.
+
+    Parameters
+    ----------
+    **kwargs : dict
+        Keyword arguments in pandas.read_csv.
+
+    Returns
+    -------
+    modin.pandas.DataFrame
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    squeeze = kwargs.pop(
+        "squeeze", False
+    )  # pragma: no cover: this is a removed argument and should be removed upstream
+    pd_obj = FactoryDispatcher.read_csv(**kwargs)
+    # This happens when `read_csv` returns a TextFileReader object for iterating through
+    if isinstance(pd_obj, TextFileReader):  # pragma: no cover
+        reader = pd_obj.read
+        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(
+            query_compiler=reader(*args, **kwargs)
+        )
+        return pd_obj
+    result = ModinObjects.DataFrame(query_compiler=pd_obj)
+    if squeeze:
+        return result.squeeze(axis=1)
+    return result
+
+
+# TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available
+@expanduser_path_arg("path_or_buffer")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_xml(
+    path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
+    *,
+    xpath: str = "./*",
+    namespaces: dict[str, str] | None = None,
+    elems_only: bool = False,
+    attrs_only: bool = False,
+    names: Sequence[str] | None = None,
+    dtype: DtypeArg | None = None,
+    converters: ConvertersArg | None = None,
+    parse_dates: ParseDatesArg | None = None,
+    encoding: str | None = "utf-8",
+    parser: XMLParsers = "lxml",
+    stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None,
+    iterparse: dict[str, list[str]] | None = None,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+) -> DataFrame:
+    # TODO(https://github.com/modin-project/modin/issues/7104):
+    # modin needs to remove defaults to pandas at API layer
+    ErrorMessage.not_implemented()
+
+
+@_inherit_docstrings(pandas.read_csv, apilink="pandas.read_csv")
+@expanduser_path_arg("filepath_or_buffer")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_csv(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
+    sep: str | None | NoDefault = no_default,
+    delimiter: str | None | NoDefault = None,
+    # Column and Index Locations and Names
+    header: int | Sequence[int] | None | Literal["infer"] = "infer",
+    names: Sequence[Hashable] | None | NoDefault = no_default,
+    index_col: IndexLabel | Literal[False] | None = None,
+    usecols=None,
+    # General Parsing Configuration
+    dtype: DtypeArg | None = None,
+    engine: CSVEngine | None = None,
+    converters=None,
+    true_values=None,
+    false_values=None,
+    skipinitialspace: bool = False,
+    skiprows=None,
+    skipfooter: int = 0,
+    nrows: int | None = None,
+    # NA and Missing Data Handling
+    na_values=None,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    verbose: bool = no_default,
+    skip_blank_lines: bool = True,
+    # Datetime Handling
+    parse_dates=None,
+    infer_datetime_format: bool = no_default,
+    keep_date_col: bool = no_default,
+    date_parser=no_default,
+    date_format=None,
+    dayfirst: bool = False,
+    cache_dates: bool = True,
+    # Iteration
+    iterator: bool = False,
+    chunksize: int | None = None,
+    # Quoting, Compression, and File Format
+    compression: CompressionOptions = "infer",
+    thousands: str | None = None,
+    decimal: str = ".",
+    lineterminator: str | None = None,
+    quotechar: str = '"',
+    quoting: int = csv.QUOTE_MINIMAL,
+    doublequote: bool = True,
+    escapechar: str | None = None,
+    comment: str | None = None,
+    encoding: str | None = None,
+    encoding_errors: str | None = "strict",
+    dialect: str | csv.Dialect | None = None,
+    # Error Handling
+    on_bad_lines="error",
+    # Internal
+    delim_whitespace: bool = no_default,
+    low_memory=_c_parser_defaults["low_memory"],
+    memory_map: bool = False,
+    float_precision: Literal["high", "legacy"] | None = None,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+) -> DataFrame | TextFileReader:  # pragma: no cover: this function is overridden by plugin/pd_overrides.py
+    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
+    _pd_read_csv_signature = {
+        val.name for val in inspect.signature(pandas.read_csv).parameters.values()
+    }
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
+    return _read(**kwargs)
+
+
+@_inherit_docstrings(pandas.read_table, apilink="pandas.read_table")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def read_table(
+    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
+    *,
+    sep: str | None | NoDefault = no_default,
+    delimiter: str | None | NoDefault = None,
+    # Column and Index Locations and Names
+    header: int | Sequence[int] | None | Literal["infer"] = "infer",
+    names: Sequence[Hashable] | None | NoDefault = no_default,
+    index_col: IndexLabel | Literal[False] | None = None,
+    usecols=None,
+    # General Parsing Configuration
+    dtype: DtypeArg | None = None,
+    engine: CSVEngine | None = None,
+    converters=None,
+    true_values=None,
+    false_values=None,
+    skipinitialspace: bool = False,
+    skiprows=None,
+    skipfooter: int = 0,
+    nrows: int | None = None,
+    # NA and Missing Data Handling
+    na_values=None,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    verbose: bool = False,
+    skip_blank_lines: bool = True,
+    # Datetime Handling
+    parse_dates=False,
+    infer_datetime_format: bool = no_default,
+    keep_date_col: bool = False,
+    date_parser=no_default,
+    date_format: str = None,
+    dayfirst: bool = False,
+    cache_dates: bool = True,
+    # Iteration
+    iterator: bool = False,
+    chunksize: int | None = None,
+    # Quoting, Compression, and File Format
+    compression: CompressionOptions = "infer",
+    thousands: str | None = None,
+    decimal: str = ".",
+    lineterminator: str | None = None,
+    quotechar: str = '"',
+    quoting: int = csv.QUOTE_MINIMAL,
+    doublequote: bool = True,
+    escapechar: str | None = None,
+    comment: str | None = None,
+    encoding: str | None = None,
+    encoding_errors: str | None = "strict",
+    dialect: str | csv.Dialect | None = None,
+    # Error Handling
+    on_bad_lines="error",
+    # Internal
+    delim_whitespace=False,
+    low_memory=_c_parser_defaults["low_memory"],
+    memory_map: bool = False,
+    float_precision: str | None = None,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+) -> DataFrame | TextFileReader:  # pragma: no cover
+    # ISSUE #2408: parse parameter shared with pandas read_csv and read_table and update with provided args
+    _pd_read_table_signature = {
+        val.name for val in inspect.signature(pandas.read_table).parameters.values()
+    }
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    if f_locals.get("sep", sep) is False or f_locals.get("sep", sep) is no_default:
+        f_locals["sep"] = "\t"
+    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_table_signature}
+    return _read(**kwargs)
+
+
+# TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path")
+def read_parquet(
+    path,
+    engine: str = "auto",
+    columns: list[str] | None = None,
+    storage_options: StorageOptions = None,
+    use_nullable_dtypes: bool = no_default,
+    dtype_backend=no_default,
+    filesystem=None,
+    filters=None,
+    **kwargs,
+) -> DataFrame:  # pragma: no cover: this function is overridden by plugin/pd_overrides.py
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    if engine == "fastparquet" and dtype_backend is not no_default:
+        raise ValueError(
+            "The 'dtype_backend' argument is not supported for the fastparquet engine"
+        )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_parquet(
+            path=path,
+            engine=engine,
+            columns=columns,
+            storage_options=storage_options,
+            use_nullable_dtypes=use_nullable_dtypes,
+            dtype_backend=dtype_backend,
+            filesystem=filesystem,
+            filters=filters,
+            **kwargs,
+        )
+    )
+
+
+# TODO: SNOW-1265551: add inherit_docstrings decorators once docstring overrides are available
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path_or_buf")
+def read_json(
+    path_or_buf,
+    *,
+    orient: str | None = None,
+    typ: Literal["frame", "series"] = "frame",
+    dtype: DtypeArg | None = None,
+    convert_axes=None,
+    convert_dates: bool | list[str] = True,
+    keep_default_dates: bool = True,
+    precise_float: bool = False,
+    date_unit: str | None = None,
+    encoding: str | None = None,
+    encoding_errors: str | None = "strict",
+    lines: bool = False,
+    chunksize: int | None = None,
+    compression: CompressionOptions = "infer",
+    nrows: int | None = None,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    engine="ujson",
+) -> DataFrame | Series | pandas.io.json._json.JsonReader:  # pragma: no cover: this function is overridden by plugin/pd_overrides.py
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_json(**kwargs))
+
+
+@_inherit_docstrings(pandas.read_gbq, apilink="pandas.read_gbq")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_gbq(
+    query: str,
+    project_id: str | None = None,
+    index_col: str | None = None,
+    col_order: list[str] | None = None,
+    reauth: bool = False,
+    auth_local_webserver: bool = True,
+    dialect: str | None = None,
+    location: str | None = None,
+    configuration: dict[str, Any] | None = None,
+    credentials=None,
+    use_bqstorage_api: bool | None = None,
+    max_results: int | None = None,
+    progress_bar_type: str | None = None,
+) -> DataFrame:
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    kwargs.update(kwargs.pop("kwargs", {}))
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_gbq(**kwargs))
+
+
+@_inherit_docstrings(pandas.read_html, apilink="pandas.read_html")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("io")
+def read_html(
+    io,
+    *,
+    match: str | Pattern = ".+",
+    flavor: str | None = None,
+    header: int | Sequence[int] | None = None,
+    index_col: int | Sequence[int] | None = None,
+    skiprows: int | Sequence[int] | slice | None = None,
+    attrs: dict[str, str] | None = None,
+    parse_dates: bool = False,
+    thousands: str | None = ",",
+    encoding: str | None = None,
+    decimal: str = ".",
+    converters: dict | None = None,
+    na_values: Iterable[object] | None = None,
+    keep_default_na: bool = True,
+    displayed_only: bool = True,
+    extract_links: Literal[None, "header", "footer", "body", "all"] = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    storage_options: StorageOptions = None,
+) -> list[DataFrame]:  # pragma: no cover  # noqa: PR01, RT01, D200
+    """
+    Read HTML tables into a ``DataFrame`` object.
+    """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    qcs = FactoryDispatcher.read_html(**kwargs)
+    return [ModinObjects.DataFrame(query_compiler=qc) for qc in qcs]
+
+
+@_inherit_docstrings(pandas.read_clipboard, apilink="pandas.read_clipboard")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_clipboard(
+    sep=r"\s+",
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    **kwargs,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    """
+    Read text from clipboard and pass to read_csv.
+    """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    kwargs.update(kwargs.pop("kwargs", {}))
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_clipboard(**kwargs)
+    )
+
+
+@_inherit_docstrings(pandas.read_excel, apilink="pandas.read_excel")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("io")
+def read_excel(
+    io,
+    sheet_name: str | int | list[IntStrT] | None = 0,
+    *,
+    header: int | Sequence[int] | None = 0,
+    names: list[str] | None = None,
+    index_col: int | Sequence[int] | None = None,
+    usecols: int
+    | str
+    | Sequence[int]
+    | Sequence[str]
+    | Callable[[str], bool]
+    | None = None,
+    dtype: DtypeArg | None = None,
+    engine: Literal[("xlrd", "openpyxl", "odf", "pyxlsb")] | None = None,
+    converters: dict[str, Callable] | dict[int, Callable] | None = None,
+    true_values: Iterable[Hashable] | None = None,
+    false_values: Iterable[Hashable] | None = None,
+    skiprows: Sequence[int] | int | Callable[[int], object] | None = None,
+    nrows: int | None = None,
+    na_values=None,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    verbose: bool = False,
+    parse_dates: list | dict | bool = False,
+    date_parser: Callable | NoDefault = no_default,
+    date_format=None,
+    thousands: str | None = None,
+    decimal: str = ".",
+    comment: str | None = None,
+    skipfooter: int = 0,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    engine_kwargs: dict | None = None,
+) -> DataFrame | dict[IntStrT, DataFrame]:  # pragma: no cover
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    intermediate = FactoryDispatcher.read_excel(**kwargs)
+    if isinstance(intermediate, (OrderedDict, dict)):
+        parsed = type(intermediate)()
+        for key in intermediate.keys():
+            parsed[key] = ModinObjects.DataFrame(query_compiler=intermediate.get(key))
+        return parsed
+    else:
+        return ModinObjects.DataFrame(query_compiler=intermediate)
+
+
+@_inherit_docstrings(pandas.read_hdf, apilink="pandas.read_hdf")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path_or_buf")
+def read_hdf(
+    path_or_buf,
+    key=None,
+    mode: str = "r",
+    errors: str = "strict",
+    where=None,
+    start: int | None = None,
+    stop: int | None = None,
+    columns=None,
+    iterator=False,
+    chunksize: int | None = None,
+    **kwargs,
+):  # noqa: PR01, RT01, D200
+    """
+    Read data from the store into DataFrame.
+    """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    kwargs.update(kwargs.pop("kwargs", {}))
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_hdf(**kwargs))
+
+
+@_inherit_docstrings(pandas.read_feather, apilink="pandas.read_feather")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path")
+def read_feather(
+    path,
+    columns: Sequence[Hashable] | None = None,
+    use_threads: bool = True,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+):
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_feather(**kwargs)
+    )
+
+
+@_inherit_docstrings(pandas.read_stata)
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def read_stata(
+    filepath_or_buffer,
+    *,
+    convert_dates: bool = True,
+    convert_categoricals: bool = True,
+    index_col: str | None = None,
+    convert_missing: bool = False,
+    preserve_dtypes: bool = True,
+    columns: Sequence[str] | None = None,
+    order_categoricals: bool = True,
+    chunksize: int | None = None,
+    iterator: bool = False,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
+) -> DataFrame | pandas.io.stata.StataReader:
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_stata(**kwargs))
+
+
+@_inherit_docstrings(pandas.read_sas, apilink="pandas.read_sas")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def read_sas(
+    filepath_or_buffer,
+    *,
+    format: str | None = None,
+    index: Hashable | None = None,
+    encoding: str | None = None,
+    chunksize: int | None = None,
+    iterator: bool = False,
+    compression: CompressionOptions = "infer",
+) -> DataFrame | pandas.io.sas.sasreader.ReaderBase:  # noqa: PR01, RT01, D200
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_sas(
+            filepath_or_buffer=filepath_or_buffer,
+            format=format,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+            iterator=iterator,
+            compression=compression,
+        )
+    )
+
+
+@_inherit_docstrings(pandas.read_pickle, apilink="pandas.read_pickle")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def read_pickle(
+    filepath_or_buffer,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
+):
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_pickle(**kwargs)
+    )
+
+
+@_inherit_docstrings(pandas.read_sql, apilink="pandas.read_sql")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_sql(
+    sql,
+    con,
+    index_col=None,
+    coerce_float=True,
+    params=None,
+    parse_dates=None,
+    columns=None,
+    chunksize=None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    dtype=None,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    """
+    Read SQL query or database table into a DataFrame.
+    """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    if kwargs.get("chunksize") is not None:
+        ErrorMessage.default_to_pandas("Parameters provided [chunksize]")
+        df_gen = pandas.read_sql(**kwargs)
+        return (
+            ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
+            for df in df_gen
+        )
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.read_sql(**kwargs))
+
+
+@_inherit_docstrings(pandas.read_fwf, apilink="pandas.read_fwf")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def read_fwf(
+    filepath_or_buffer: str | pathlib.Path | IO[AnyStr],
+    *,
+    colspecs="infer",
+    widths=None,
+    infer_nrows=100,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    **kwds,
+):  # pragma: no cover  # noqa: PR01, RT01, D200
+    """
+    Read a table of fixed-width formatted lines into DataFrame.
+    """
+    from pandas.io.parsers.base_parser import parser_defaults
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+    kwargs.update(kwargs.pop("kwds", {}))
+    target_kwargs = parser_defaults.copy()
+    target_kwargs.update(kwargs)
+    pd_obj = FactoryDispatcher.read_fwf(**target_kwargs)
+    # When `read_fwf` returns a TextFileReader object for iterating through
+    if isinstance(pd_obj, TextFileReader):
+        reader = pd_obj.read
+        pd_obj.read = lambda *args, **kwargs: ModinObjects.DataFrame(
+            query_compiler=reader(*args, **kwargs)
+        )
+        return pd_obj
+    return ModinObjects.DataFrame(query_compiler=pd_obj)
+
+
+@_inherit_docstrings(pandas.read_sql_table, apilink="pandas.read_sql_table")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_sql_table(
+    table_name,
+    con,
+    schema=None,
+    index_col=None,
+    coerce_float=True,
+    parse_dates=None,
+    columns=None,
+    chunksize=None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+):  # noqa: PR01, RT01, D200
+    """
+    Read SQL database table into a DataFrame.
+    """
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_sql_table(**kwargs)
+    )
+
+
+@_inherit_docstrings(pandas.read_sql_query, apilink="pandas.read_sql_query")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_sql_query(
+    sql,
+    con,
+    index_col: str | list[str] | None = None,
+    coerce_float: bool = True,
+    params: list[str] | dict[str, str] | None = None,
+    parse_dates: list[str] | dict[str, str] | None = None,
+    chunksize: int | None = None,
+    dtype: DtypeArg | None = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+) -> DataFrame | Iterator[DataFrame]:
+    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_sql_query(**kwargs)
+    )
+
+
+@_inherit_docstrings(pandas.to_pickle)
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("filepath_or_buffer")
+def to_pickle(
+    obj: Any,
+    filepath_or_buffer,
+    compression: CompressionOptions = "infer",
+    protocol: int = pickle.HIGHEST_PROTOCOL,
+    storage_options: StorageOptions = None,
+) -> None:
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    if isinstance(obj, ModinObjects.DataFrame):
+        obj = obj._query_compiler
+    return FactoryDispatcher.to_pickle(
+        obj,
+        filepath_or_buffer=filepath_or_buffer,
+        compression=compression,
+        protocol=protocol,
+        storage_options=storage_options,
+    )
+
+
+@_inherit_docstrings(pandas.read_spss, apilink="pandas.read_spss")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path")
+def read_spss(
+    path: str | pathlib.Path,
+    usecols: Sequence[str] | None = None,
+    convert_categoricals: bool = True,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+):  # noqa: PR01, RT01, D200
+    """
+    Load an SPSS file from the file path, returning a DataFrame.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(
+        query_compiler=FactoryDispatcher.read_spss(
+            path=path,
+            usecols=usecols,
+            convert_categoricals=convert_categoricals,
+            dtype_backend=dtype_backend,
+        )
+    )
+
+
+@_inherit_docstrings(pandas.json_normalize, apilink="pandas.json_normalize")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def json_normalize(
+    data: dict | list[dict],
+    record_path: str | list | None = None,
+    meta: str | list[str | list[str]] | None = None,
+    meta_prefix: str | None = None,
+    record_prefix: str | None = None,
+    errors: str | None = "raise",
+    sep: str = ".",
+    max_level: int | None = None,
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Normalize semi-structured JSON data into a flat table.
+    """
+    # TODO(https://github.com/modin-project/modin/issues/7104):
+    # modin needs to remove defaults to pandas at API layer
+    ErrorMessage.not_implemented()
+
+
+@_inherit_docstrings(pandas.read_orc, apilink="pandas.read_orc")
+@snowpark_pandas_telemetry_standalone_function_decorator
+@expanduser_path_arg("path")
+def read_orc(
+    path,
+    columns: list[str] | None = None,
+    dtype_backend: DtypeBackend | NoDefault = no_default,
+    filesystem=None,
+    **kwargs,
+) -> DataFrame:  # noqa: PR01, RT01, D200
+    """
+    Load an ORC object from the file path, returning a DataFrame.
+    """
+    # TODO(https://github.com/modin-project/modin/issues/7104):
+    # modin needs to remove defaults to pandas at API layer
+    ErrorMessage.not_implemented()
+
+
+@_inherit_docstrings(pandas.HDFStore)
+@snowpark_pandas_telemetry_standalone_function_decorator
+class HDFStore(pandas.HDFStore):  # pragma: no cover  # noqa: PR01, D200
+    """
+    Dict-like IO interface for storing pandas objects in PyTables.
+    """
+
+    _return_modin_dataframe = True
+
+    def __getattribute__(self, item):
+        default_behaviors = ["__init__", "__class__"]
+        method = super().__getattribute__(item)
+        if item not in default_behaviors:
+            if callable(method):
+
+                def return_handler(*args, **kwargs):
+                    """
+                    Replace the default behavior of methods with inplace kwarg.
+
+                    Returns
+                    -------
+                    A Modin DataFrame in place of a pandas DataFrame, or the same
+                    return type as pandas.HDFStore.
+
+                    Notes
+                    -----
+                    This function will replace all of the arguments passed to
+                    methods of HDFStore with the pandas equivalent. It will convert
+                    Modin DataFrame to pandas DataFrame, etc. Currently, pytables
+                    does not accept Modin DataFrame objects, so we must convert to
+                    pandas.
+                    """
+                    # We don't want to constantly be giving this error message for
+                    # internal methods.
+                    if item[0] != "_":
+                        ErrorMessage.default_to_pandas(f"`{item}`")
+                    args = [
+                        to_pandas(arg)
+                        if isinstance(arg, ModinObjects.DataFrame)
+                        else arg
+                        for arg in args
+                    ]
+                    kwargs = {
+                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v
+                        for k, v in kwargs.items()
+                    }
+                    obj = super(HDFStore, self).__getattribute__(item)(*args, **kwargs)
+                    if self._return_modin_dataframe and isinstance(
+                        obj, pandas.DataFrame
+                    ):
+                        return ModinObjects.DataFrame(obj)
+                    return obj
+
+                # We replace the method with `return_handler` for inplace operations
+                method = return_handler
+        return method
+
+
+@_inherit_docstrings(pandas.ExcelFile)
+@snowpark_pandas_telemetry_standalone_function_decorator
+class ExcelFile(pandas.ExcelFile):  # pragma: no cover # noqa: PR01, D200
+    """
+    Class for parsing tabular excel sheets into DataFrame objects.
+    """
+
+    _behave_like_pandas = False
+
+    def _set_pandas_mode(self):  # noqa
+        # disable Modin behavior to be able to pass object to `pandas.read_excel`
+        # otherwise, Modin objects may be passed to the pandas context, resulting
+        # in undefined behavior
+        self._behave_like_pandas = True
+
+    def __getattribute__(self, item):
+        if item in ["_set_pandas_mode", "_behave_like_pandas"]:
+            return object.__getattribute__(self, item)
+
+        default_behaviors = ["__init__", "__class__"]
+        method = super().__getattribute__(item)
+        if not self._behave_like_pandas and item not in default_behaviors:
+            if callable(method):
+
+                def return_handler(*args, **kwargs):
+                    """
+                    Replace the default behavior of methods with inplace kwarg.
+
+                    Returns
+                    -------
+                    A Modin DataFrame in place of a pandas DataFrame, or the same
+                    return type as pandas.ExcelFile.
+
+                    Notes
+                    -----
+                    This function will replace all of the arguments passed to
+                    methods of ExcelFile with the pandas equivalent. It will convert
+                    Modin DataFrame to pandas DataFrame, etc.
+                    """
+                    # We don't want to constantly be giving this error message for
+                    # internal methods.
+                    if item[0] != "_":
+                        ErrorMessage.default_to_pandas(f"`{item}`")
+                    args = [
+                        to_pandas(arg)
+                        if isinstance(arg, ModinObjects.DataFrame)
+                        else arg
+                        for arg in args
+                    ]
+                    kwargs = {
+                        k: to_pandas(v) if isinstance(v, ModinObjects.DataFrame) else v
+                        for k, v in kwargs.items()
+                    }
+                    obj = super(ExcelFile, self).__getattribute__(item)(*args, **kwargs)
+                    if isinstance(obj, pandas.DataFrame):
+                        return ModinObjects.DataFrame(obj)
+                    return obj
+
+                # We replace the method with `return_handler` for inplace operations
+                method = return_handler
+        return method
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def from_non_pandas(df, index, columns, dtype):  # pragma: no cover
+    """
+    Convert a non-pandas DataFrame into Modin DataFrame.
+
+    Parameters
+    ----------
+    df : object
+        Non-pandas DataFrame.
+    index : object
+        Index for non-pandas DataFrame.
+    columns : object
+        Columns for non-pandas DataFrame.
+    dtype : type
+        Data type to force.
+
+    Returns
+    -------
+    modin.pandas.DataFrame
+        Converted DataFrame.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    new_qc = FactoryDispatcher.from_non_pandas(df, index, columns, dtype)
+    if new_qc is not None:
+        return ModinObjects.DataFrame(query_compiler=new_qc)
+    return new_qc
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def from_pandas(df):  # pragma: no cover
+    """
+    Convert a pandas DataFrame to a Modin DataFrame.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The pandas DataFrame to convert.
+
+    Returns
+    -------
+    modin.pandas.DataFrame
+        A new Modin DataFrame object.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def from_arrow(at):  # pragma: no cover
+    """
+    Convert an Arrow Table to a Modin DataFrame.
+
+    Parameters
+    ----------
+    at : Arrow Table
+        The Arrow Table to convert from.
+
+    Returns
+    -------
+    DataFrame
+        A new Modin DataFrame object.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_arrow(at))
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def from_dataframe(df):  # pragma: no cover
+    """
+    Convert a DataFrame implementing the dataframe exchange protocol to a Modin DataFrame.
+
+    See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The DataFrame object supporting the dataframe exchange protocol.
+
+    Returns
+    -------
+    DataFrame
+        A new Modin DataFrame object.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return ModinObjects.DataFrame(query_compiler=FactoryDispatcher.from_dataframe(df))
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_pandas(modin_obj: SupportsPrivateToPandas) -> Any:  # pragma: no cover
+    """
+    Convert a Modin DataFrame/Series to a pandas DataFrame/Series.
+
+    Parameters
+    ----------
+    modin_obj : modin.DataFrame, modin.Series
+        The Modin DataFrame/Series to convert.
+
+    Returns
+    -------
+    pandas.DataFrame or pandas.Series
+        Converted object with type depending on input.
+    """
+    return modin_obj._to_pandas()
+
+
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_numpy(
+    modin_obj: SupportsPrivateToNumPy | SupportsPublicToNumPy,
+) -> np.ndarray:  # pragma: no cover
+    """
+    Convert a Modin object to a NumPy array.
+
+    Parameters
+    ----------
+    modin_obj : modin.DataFrame, modin."Series", modin.numpy.array
+        The Modin distributed object to convert.
+
+    Returns
+    -------
+    numpy.array
+        Converted object with type depending on input.
+    """
+    if isinstance(modin_obj, SupportsPrivateToNumPy):
+        return modin_obj._to_numpy()
+    array = modin_obj.to_numpy()
+    if ExperimentalNumPyAPI.get():
+        array = array._to_numpy()
+    return array
+
+
+__all__ = [
+    "ExcelFile",
+    "HDFStore",
+    "json_normalize",
+    "read_clipboard",
+    "read_csv",
+    "read_excel",
+    "read_feather",
+    "read_fwf",
+    "read_gbq",
+    "read_hdf",
+    "read_html",
+    "read_json",
+    "read_orc",
+    "read_parquet",
+    "read_pickle",
+    "read_sas",
+    "read_spss",
+    "read_sql",
+    "read_sql_query",
+    "read_sql_table",
+    "read_stata",
+    "read_table",
+    "read_xml",
+    "from_non_pandas",
+    "from_pandas",
+    "from_arrow",
+    "from_dataframe",
+    "to_pickle",
+    "to_pandas",
+    "to_numpy",
+]
diff --git a/src/snowflake/snowpark/modin/pandas/iterator.py b/src/snowflake/snowpark/modin/pandas/iterator.py
new file mode 100644
index 00000000000..717a743861c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/iterator.py
@@ -0,0 +1,82 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Place to define the Modin iterator."""
+
+from collections.abc import Iterator
+from typing import Callable
+
+import snowflake.snowpark.modin.pandas.dataframe as DataFrame
+
+
+class PartitionIterator(Iterator):
+    """
+    Iterator on partitioned data.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe to iterate over.
+    axis : {0, 1}
+        Axis to iterate over.
+    func : callable
+        The function to get inner iterables from each partition.
+    """
+
+    def __init__(self, df: DataFrame, axis: int, func: Callable) -> None:
+        self.df = df
+        self.axis = axis
+        self.index_iter = (
+            zip(
+                iter(slice(None) for _ in range(len(self.df.columns))),
+                range(len(self.df.columns)),
+            )
+            if axis
+            else zip(
+                range(len(self.df.index)),
+                iter(slice(None) for _ in range(len(self.df.index))),
+            )
+        )
+        self.func = func
+
+    def __iter__(self) -> None:
+        """
+        Implement iterator interface.
+
+        Returns
+        -------
+        PartitionIterator
+            Iterator object.
+        """
+        return self
+
+    def __next__(self):
+        """
+        Implement iterator interface.
+
+        Returns
+        -------
+        PartitionIterator
+            Incremented iterator object.
+        """
+        key = next(self.index_iter)
+        df = self.df.iloc[key]
+        return self.func(df)
diff --git a/src/snowflake/snowpark/modin/pandas/plotting.py b/src/snowflake/snowpark/modin/pandas/plotting.py
new file mode 100644
index 00000000000..e7099e94011
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/plotting.py
@@ -0,0 +1,84 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement pandas plotting API."""
+
+from pandas import plotting as pdplot
+
+# Snowpark pandas API version
+from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+from snowflake.snowpark.modin.utils import instancer, to_pandas
+
+
+@instancer
+class Plotting:
+    """Wrapper of pandas plotting module."""
+
+    def __dir__(self):
+        """
+        Enable tab completion of plotting library.
+
+        Returns
+        -------
+        list
+            List of attributes in `self`.
+        """
+        return dir(pdplot)
+
+    def __getattribute__(self, item):
+        """
+        Convert any Modin DataFrames in parameters to pandas so that they can be plotted normally.
+
+        Parameters
+        ----------
+        item : str
+            Attribute to look for.
+
+        Returns
+        -------
+        object
+            If attribute is found in pandas.plotting, and it is a callable, a wrapper function is
+            returned which converts its arguments to pandas and calls a function pandas.plotting.`item`
+            on these arguments.
+            If attribute is found in pandas.plotting but it is not a callable, returns it.
+            Otherwise function tries to look for an attribute in `self`.
+        """
+        if hasattr(pdplot, item):
+            func = getattr(pdplot, item)
+            if callable(func):
+
+                def wrap_func(*args, **kwargs):
+                    """Convert Modin DataFrames to pandas then call the function."""
+                    args = tuple(
+                        arg if not isinstance(arg, DataFrame) else to_pandas(arg)
+                        for arg in args
+                    )
+                    kwargs = {
+                        kwd: val if not isinstance(val, DataFrame) else to_pandas(val)
+                        for kwd, val in kwargs.items()
+                    }
+                    return func(*args, **kwargs)
+
+                return wrap_func
+            else:
+                return func
+        else:
+            return object.__getattribute__(self, item)
diff --git a/src/snowflake/snowpark/modin/pandas/resample.py b/src/snowflake/snowpark/modin/pandas/resample.py
new file mode 100644
index 00000000000..e33d3dc565f
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/resample.py
@@ -0,0 +1,510 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the pandas project, under the BSD 3-Clause License
+
+"""Implement Resampler public API."""
+from typing import Any, Callable, Literal, Optional, Union
+
+import numpy as np
+import pandas
+import pandas.core.resample
+from pandas._libs import lib
+from pandas._libs.lib import no_default
+from pandas._typing import AggFuncType, AnyArrayLike, Axis, T
+
+from snowflake.snowpark.modin import (  # noqa: F401  # add this line to enable doc tests to run
+    pandas as pd,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
+from snowflake.snowpark.modin.plugin._typing import InterpolateOptions
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import (
+    _inherit_docstrings,
+    doc_replace_dataframe_with_link,
+)
+
+
+@_inherit_docstrings(
+    pandas.core.resample.Resampler, modify_doc=doc_replace_dataframe_with_link
+)
+class Resampler(metaclass=TelemetryMeta):
+    def __init__(
+        self,
+        dataframe,
+        rule,
+        axis=0,
+        closed=None,
+        label=None,
+        convention="start",
+        kind=None,
+        on=None,
+        level=None,
+        origin="start_day",
+        offset=None,
+        group_keys=no_default,
+    ) -> None:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._dataframe = dataframe
+        self._query_compiler = dataframe._query_compiler
+        self.axis = self._dataframe._get_axis_number(axis)
+        self.resample_kwargs = {
+            "rule": rule,
+            "axis": axis,
+            "closed": closed,
+            "label": label,
+            "convention": convention,
+            "kind": kind,
+            "on": on,
+            "level": level,
+            "origin": origin,
+            "offset": offset,
+            "group_keys": group_keys,
+        }
+        self.__groups = self._get_groups()
+
+    def _method_not_implemented(self, method: str):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        raise ErrorMessage.not_implemented(
+            f"Method {method} is not implemented for Resampler!"
+        )
+
+    def _validate_numeric_only_for_aggregate_methods(self, numeric_only):
+        """
+        When the caller object is Series (ndim == 1), it is not valid to call aggregation
+        method with numeric_only = True.
+
+        Raises:
+            NotImplementedError if the above condition is encountered.
+        """
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        if self._dataframe.ndim == 1:
+            if numeric_only and numeric_only is not lib.no_default:
+                raise ErrorMessage.not_implemented(
+                    "Series Resampler does not implement numeric_only."
+                )
+
+    def _get_groups(self):
+        """
+        Compute the resampled groups.
+
+        Returns
+        -------
+        PandasGroupby
+            Groups as specified by resampling arguments.
+        """
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        df = self._dataframe if self.axis == 0 else self._dataframe.T
+        groups = df.groupby(
+            pandas.Grouper(
+                key=self.resample_kwargs["on"],
+                freq=self.resample_kwargs["rule"],
+                closed=self.resample_kwargs["closed"],
+                label=self.resample_kwargs["label"],
+                convention=self.resample_kwargs["convention"],
+                level=self.resample_kwargs["level"],
+                origin=self.resample_kwargs["origin"],
+                offset=self.resample_kwargs["offset"],
+            ),
+            group_keys=self.resample_kwargs["group_keys"],
+        )
+        return groups
+
+    def __getitem__(self, key):  # pragma: no cover
+        """
+        Get ``Resampler`` based on `key` columns of original dataframe.
+
+        Parameters
+        ----------
+        key : str or list
+            String or list of selections.
+
+        Returns
+        -------
+        modin.pandas.BasePandasDataset
+            New ``Resampler`` based on `key` columns subset
+            of the original dataframe.
+        """
+
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+
+        def _get_new_resampler(key):
+            subset = self._dataframe[key]
+            resampler = type(self)(subset, **self.resample_kwargs)
+            return resampler
+
+        from snowflake.snowpark.modin.pandas.series import Series
+
+        if isinstance(key, (list, tuple, Series, pandas.Index, np.ndarray)):
+            if len(self._dataframe.columns.intersection(key)) != len(set(key)):
+                missed_keys = list(set(key).difference(self._dataframe.columns))
+                raise KeyError(f"Columns not found: {str(sorted(missed_keys))[1:-1]}")
+            return _get_new_resampler(list(key))
+
+        if key not in self._dataframe:
+            raise KeyError(f"Column not found: {key}")
+
+        return _get_new_resampler(key)
+
+    @property
+    def groups(self):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("groups")
+        # This property is currently not supported, and NotImplementedError will be
+        # thrown before reach here. This is kept here because property function requires
+        # a return value.
+        return self._query_compiler.default_to_pandas(
+            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).groups
+        )
+
+    @property
+    def indices(self):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("indices")
+        # Same as groups, keeps the return because indices requires return value
+        return self._query_compiler.default_to_pandas(
+            lambda df: pandas.DataFrame.resample(df, **self.resample_kwargs).indices
+        )
+
+    def get_group(self, name, obj=None):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("get_group")
+
+    def apply(
+        self, func: Optional[AggFuncType] = None, *args: Any, **kwargs: Any
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("aggregate")
+
+    def aggregate(
+        self, func: Optional[AggFuncType] = None, *args: Any, **kwargs: Any
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("aggregate")
+
+    agg = aggregate
+
+    def transform(
+        self,
+        arg: Union[Callable[..., T], tuple[Callable[..., T], str]],
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("transform")
+
+    def pipe(
+        self,
+        func: Union[Callable[..., T], tuple[Callable[..., T], str]],
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("pipe")
+
+    def ffill(self, limit: Optional[int] = None) -> Union[pd.DataFrame, pd.Series]:
+        is_series = not self._dataframe._is_dataframe
+
+        if limit is not None:
+            ErrorMessage.not_implemented(
+                "Parameter limit of resample.ffill has not been implemented."
+            )
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "ffill",
+                (),
+                {},
+                is_series,
+            )
+        )
+
+    def backfill(self, limit: Optional[int] = None):
+        self._method_not_implemented("backfill")  # pragma: no cover
+
+    def bfill(self, limit: Optional[int] = None):  # pragma: no cover
+        self._method_not_implemented("bfill")
+
+    def pad(self, limit: Optional[int] = None):  # pragma: no cover
+        self._method_not_implemented("pad")
+
+    def nearest(self, limit: Optional[int] = None):  # pragma: no cover
+        self._method_not_implemented("nearest")
+
+    def fillna(self, method, limit: Optional[int] = None):  # pragma: no cover
+        self._method_not_implemented("fillna")
+
+    def asfreq(self, fill_value: Optional[Any] = None):  # pragma: no cover
+        self._method_not_implemented("asfreq")
+
+    def interpolate(
+        self,
+        method: InterpolateOptions = "linear",
+        *,
+        axis: Axis = 0,
+        limit: Optional[int] = None,
+        inplace: bool = False,
+        limit_direction: Literal["forward", "backward", "both"] = "forward",
+        limit_area: Optional[Literal["inside", "outside"]] = None,
+        downcast: Optional[Literal["infer"]] = None,
+        **kwargs,
+    ):  # pragma: no cover
+        self._method_not_implemented("interpolate")
+
+    def count(self) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "count",
+                tuple(),
+                dict(),
+                is_series,
+            )
+        )
+
+    def nunique(self, *args: Any, **kwargs: Any):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("nunique")
+
+    def first(
+        self,
+        numeric_only: Union[bool, lib.NoDefault] = lib.no_default,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("first")
+
+    def last(
+        self,
+        numeric_only: Union[bool, lib.NoDefault] = lib.no_default,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("last")
+
+    def max(
+        self,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_max", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only, min_count=min_count)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "max",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def mean(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_mean", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "mean",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def median(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_median", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "median",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def min(
+        self,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_min", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only, min_count=min_count)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "min",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def ohlc(self, *args: Any, **kwargs: Any):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("ohlc")
+
+    def prod(
+        self,
+        numeric_only: Union[bool, lib.NoDefault] = lib.no_default,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("prod")
+
+    def size(self):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("size")
+
+    def sem(
+        self,
+        ddof: int = 1,
+        numeric_only: Union[bool, lib.NoDefault] = lib.no_default,
+        *args: Any,
+        **kwargs: Any,
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("sem")
+
+    def std(
+        self,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_std", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only, ddof=ddof)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "std",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def sum(
+        self,
+        numeric_only: bool = False,
+        min_count: int = 0,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_sum", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only, min_count=min_count)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "sum",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def var(
+        self,
+        ddof: int = 1,
+        numeric_only: Union[bool, lib.NoDefault] = lib.no_default,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Union[pd.DataFrame, pd.Series]:
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._validate_numeric_only_for_aggregate_methods(numeric_only)
+        WarningMessage.warning_if_engine_args_is_set("resample_var", args, kwargs)
+
+        agg_kwargs = dict(numeric_only=numeric_only, ddof=ddof)
+        is_series = not self._dataframe._is_dataframe
+
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.resample(
+                self.resample_kwargs,
+                "var",
+                tuple(),
+                agg_kwargs,
+                is_series,
+            )
+        )
+
+    def quantile(
+        self, q: Union[float, AnyArrayLike] = 0.5, **kwargs: Any
+    ):  # pragma: no cover
+        # TODO: SNOW-1063368: Modin upgrade - modin.pandas.resample.Resample
+        self._method_not_implemented("quantile")
diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py
new file mode 100644
index 00000000000..2e52624814c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/series.py
@@ -0,0 +1,2723 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module houses `Series` class, that is distributed version of `pandas.Series`."""
+
+from __future__ import annotations
+
+from collections.abc import Hashable, Mapping, Sequence
+from logging import getLogger
+from typing import IO, TYPE_CHECKING, Any, Callable, Literal
+
+import numpy as np
+import numpy.typing as npt
+import pandas
+from pandas._libs.lib import NoDefault, is_integer, no_default
+from pandas._typing import (
+    AggFuncType,
+    AnyArrayLike,
+    Axis,
+    FillnaOptions,
+    IgnoreRaise,
+    IndexKeyFunc,
+    IndexLabel,
+    Level,
+    Renamer,
+    Scalar,
+)
+from pandas.api.types import is_datetime64_any_dtype, is_string_dtype
+from pandas.core.common import apply_if_callable, is_bool_indexer
+from pandas.core.dtypes.common import is_bool_dtype, is_dict_like, is_list_like
+from pandas.core.series import _coerce_method
+from pandas.errors import SpecificationError
+from pandas.util._validators import validate_bool_kwarg
+
+from snowflake.snowpark.modin.pandas.accessor import CachedAccessor, SparseAccessor
+from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
+from snowflake.snowpark.modin.pandas.iterator import PartitionIterator
+from snowflake.snowpark.modin.pandas.utils import from_pandas, is_scalar
+from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    _inherit_docstrings,
+)
+
+if TYPE_CHECKING:
+    from snowflake.snowpark.modin.pandas.dataframe import DataFrame  # pragma: no cover
+
+# add this line to enable doc tests to run
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+
+logger = getLogger(__name__)
+
+SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE = (
+    "Currently do not support Series or list-like keys with range-like values"
+)
+
+SERIES_SETITEM_SLICE_AS_SCALAR_VALUE_ERROR_MESSAGE = (
+    "Currently do not support assigning a slice value as if it's a scalar value"
+)
+
+SERIES_SETITEM_INCOMPATIBLE_INDEXER_WITH_SERIES_ERROR_MESSAGE = (
+    "Snowpark pandas DataFrame cannot be used as an indexer with Series"
+)
+
+SERIES_SETITEM_INCOMPATIBLE_INDEXER_WITH_SCALAR_ERROR_MESSAGE = (
+    "Scalar key incompatible with {} value"
+)
+
+# Dictionary of extensions assigned to this class
+_SERIES_EXTENSIONS_ = {}
+
+
+@_inherit_docstrings(
+    pandas.Series,
+    excluded=[
+        pandas.Series.flags,
+        pandas.Series.info,
+        pandas.Series.prod,
+        pandas.Series.product,
+        pandas.Series.reindex,
+        pandas.Series.fillna,
+    ],
+    apilink="pandas.Series",
+)
+class Series(BasePandasDataset):
+    _pandas_class = pandas.Series
+    __array_priority__ = pandas.Series.__array_priority__
+
+    def __init__(
+        self,
+        data=None,
+        index=None,
+        dtype=None,
+        name=None,
+        copy=False,
+        fastpath=False,
+        query_compiler=None,
+    ) -> None:
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        # Siblings are other dataframes that share the same query compiler. We
+        # use this list to update inplace when there is a shallow copy.
+        self._siblings = []
+
+        # modified:
+        # Engine.subscribe(_update_engine)
+
+        if isinstance(data, type(self)):
+            query_compiler = data._query_compiler.copy()
+            if index is not None:
+                if any(i not in data.index for i in index):
+                    ErrorMessage.not_implemented(
+                        "Passing non-existent columns or index values to constructor "
+                        + "not yet implemented."
+                    )  # pragma: no cover
+                query_compiler = data.loc[index]._query_compiler
+        if query_compiler is None:
+            # Defaulting to pandas
+            if name is None:
+                name = MODIN_UNNAMED_SERIES_LABEL
+                if (
+                    isinstance(data, (pandas.Series, pandas.Index))
+                    and data.name is not None
+                ):
+                    name = data.name
+
+            query_compiler = from_pandas(
+                pandas.DataFrame(
+                    pandas.Series(
+                        data=data,
+                        index=index,
+                        dtype=dtype,
+                        name=name,
+                        copy=copy,
+                        fastpath=fastpath,
+                    )
+                )
+            )._query_compiler
+        self._query_compiler = query_compiler.columnarize()
+        if name is not None:
+            self.name = name
+
+    def _get_name(self):
+        """
+        Get the value of the `name` property.
+
+        Returns
+        -------
+        hashable
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        name = self._query_compiler.columns[0]
+        if name == MODIN_UNNAMED_SERIES_LABEL:
+            return None
+        return name
+
+    def _set_name(self, name):
+        """
+        Set the value of the `name` property.
+
+        Parameters
+        ----------
+        name : hashable
+            Name value to set.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if name is None:
+            name = MODIN_UNNAMED_SERIES_LABEL
+        if isinstance(name, tuple):
+            columns = pd.MultiIndex.from_tuples(tuples=[name])
+        else:
+            columns = [name]
+        self._update_inplace(
+            new_query_compiler=self._query_compiler.set_columns(columns)
+        )
+
+    name = property(_get_name, _set_name)
+    _parent = None
+    # Parent axis denotes axis that was used to select series in a parent dataframe.
+    # If _parent_axis == 0, then it means that index axis was used via df.loc[row]
+    # indexing operations and assignments should be done to rows of parent.
+    # If _parent_axis == 1 it means that column axis was used via df[column] and assignments
+    # should be done to columns of parent.
+    _parent_axis = 0
+
+    def __add__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.add(right)
+
+    def __radd__(self, left):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.radd(left)
+
+    def __and__(self, other):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__and__(other)
+
+    def __rand__(self, other):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__rand__(other)
+
+    # add `_inherit_docstrings` decorator to force method link addition.
+    @_inherit_docstrings(pandas.Series.__array__, apilink="pandas.Series.__array__")
+    def __array__(self, dtype=None):  # noqa: PR01, RT01, D200
+        """
+        Return the values as a NumPy array.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__array__(dtype).flatten()
+
+    def __contains__(self, key):
+        """
+        Check if `key` in the `Series.index`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to check the presence in the index.
+
+        Returns
+        -------
+        bool
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return key in self.index
+
+    def __copy__(self, deep=True):
+        """
+        Return the copy of the Series.
+
+        Parameters
+        ----------
+        deep : bool, default: True
+            Whether the copy should be deep or not.
+
+        Returns
+        -------
+        Series
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.copy(deep=deep)
+
+    def __deepcopy__(self, memo=None):
+        """
+        Return the deep copy of the Series.
+
+        Parameters
+        ----------
+        memo : Any, optional
+           Deprecated parameter.
+
+        Returns
+        -------
+        Series
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.copy(deep=True)
+
+    def __delitem__(self, key):
+        """
+        Delete item identified by `key` label.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to delete.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if key not in self.keys():
+            raise KeyError(key)
+        self.drop(labels=key, inplace=True)
+
+    def __divmod__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.divmod(right)
+
+    def __rdivmod__(self, left):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.rdivmod(left)
+
+    def __floordiv__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.floordiv(right)
+
+    def __rfloordiv__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.rfloordiv(right)
+
+    def __getattr__(self, key):
+        """
+        Return item identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to get.
+
+        Returns
+        -------
+        Any
+
+        Notes
+        -----
+        First try to use `__getattribute__` method. If it fails
+        try to get `key` from `Series` fields.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        try:
+            return object.__getattribute__(self, key)
+        except AttributeError as err:
+            if key not in _ATTRS_NO_LOOKUP:
+                try:
+                    value = self[key]
+                    if isinstance(value, Series) and value.empty:
+                        raise err
+                    return value
+                except Exception:
+                    # We want to raise err if self[key] raises any kind of exception
+                    raise err
+            raise err
+
+    __float__ = _coerce_method(float)
+    __int__ = _coerce_method(int)
+
+    def abs(self):
+        """
+        Return a Series with absolute numeric value of each element.
+
+        Returns
+        -------
+        Series
+
+        Examples
+        --------
+        >>> ser = pd.Series([1, -2.29, 3, -4.77])
+        >>> ser
+        0    1.00
+        1   -2.29
+        2    3.00
+        3   -4.77
+        dtype: float64
+
+        >>> abs(ser)
+        0    1.00
+        1    2.29
+        2    3.00
+        3    4.77
+        dtype: float64
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().abs()
+
+    def __neg__(self):
+        """
+        Returns a Series with the sign changed for each element.
+
+        Returns
+        -------
+        Series
+
+        Examples
+        --------
+        >>> ser = pd.Series([1, -2.29, 3, -4.77])
+        >>> ser
+        0    1.00
+        1   -2.29
+        2    3.00
+        3   -4.77
+        dtype: float64
+
+        >>> - ser
+        0   -1.00
+        1    2.29
+        2   -3.00
+        3    4.77
+        dtype: float64
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__neg__()
+
+    def __iter__(self):
+        """
+        Return an iterator of the values.
+
+        Returns
+        -------
+        iterable
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._to_pandas().__iter__()
+
+    def __mod__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.mod(right)
+
+    def __rmod__(self, left):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.rmod(left)
+
+    def __mul__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.mul(right)
+
+    def __rmul__(self, left):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.rmul(left)
+
+    def __or__(self, other):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__or__(other)
+
+    def __ror__(self, other):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__ror__(other)
+
+    def __xor__(self, other):  # pragma: no cover
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__xor__(other)
+
+    def __rxor__(self, other):  # pragma: no cover
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().__rxor__(other)
+
+    def __pow__(self, right):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.pow(right)
+
+    def __rpow__(self, left):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.rpow(left)
+
+    def __repr__(self):
+        """
+        Return a string representation for a particular Series.
+
+        Returns
+        -------
+        str
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        num_rows = pandas.get_option("display.max_rows") or 60
+        num_cols = pandas.get_option("display.max_columns") or 20
+
+        (
+            row_count,
+            col_count,
+            temp_df,
+        ) = self._query_compiler.build_repr_df(num_rows, num_cols)
+        if isinstance(temp_df, pandas.DataFrame) and not temp_df.empty:
+            temp_df = temp_df.iloc[:, 0]
+        temp_str = repr(temp_df)
+        freq_str = (
+            f"Freq: {temp_df.index.freqstr}, "
+            if isinstance(temp_df.index, pandas.DatetimeIndex)
+            else ""
+        )
+        if self.name is not None:
+            name_str = f"Name: {str(self.name)}, "
+        else:
+            name_str = ""
+        if row_count > num_rows:
+            len_str = f"Length: {row_count}, "
+        else:
+            len_str = ""
+        dtype_str = "dtype: {}".format(
+            str(self.dtype) + ")"
+            if temp_df.empty
+            else temp_str.rsplit("dtype: ", 1)[-1]
+        )
+        if row_count == 0:
+            return f"Series([], {freq_str}{name_str}{dtype_str}"
+        maxsplit = 1
+        if (
+            isinstance(temp_df, pandas.Series)
+            and temp_df.name is not None
+            and temp_df.dtype == "category"
+        ):
+            maxsplit = 2
+        return temp_str.rsplit("\n", maxsplit)[0] + "\n{}{}{}{}".format(
+            freq_str, name_str, len_str, dtype_str
+        )
+
+    def __round__(self, decimals=0):
+        """
+        Round each value in a Series to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default: 0
+            Number of decimal places to round to.
+
+        Returns
+        -------
+        Series
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().round(decimals)
+
+    def __setitem__(self, key, value):
+        """
+        Set `value` identified by `key` in the Series.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to set.
+        value : Any
+            Value to set.
+
+        Examples
+        --------
+        Using the following series to set values on. __setitem__ is an inplace operation, so copies of `series`are made
+        in the examples to highlight the different behaviors produced.
+        >>> series = pd.Series([1, "b", 3], index=["a", "b", "c"])
+
+        Using a scalar as the value to set a particular element.
+        >>> s = series.copy()
+        >>> s["c"] = "a"
+        >>> s
+        a    1
+        b    b
+        c    a
+        dtype: object
+
+        Using list-like objects as the key and value to set multiple elements.
+        >>> s = series.copy()
+        >>> s[["c", "a"]] = ["foo", "bar"]
+        >>> s  # doctest: +SKIP
+        a    bar
+        b      2
+        c    foo
+        dtype: object
+
+        Having a duplicate label in the key.
+        >>> s = series.copy()
+        >>> s[["c", "a", "c"]] = pd.Index(["foo", "bar", "baz"])
+        >>> s  # doctest: +SKIP
+        a    bar
+        b      2
+        c    baz
+        dtype: object
+
+        When using a Series as the value, its index does not matter.
+        >>> s = series.copy()  # doctest: +SKIP
+        >>> s[["a", "b"]] = pd.Series([9, 8], index=["foo", "bar"])
+        >>> s  # doctest: +SKIP
+        a    9
+        b    8
+        c    3
+        dtype: int64
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        key = apply_if_callable(key, self)
+
+        # Error Checking:
+        # Currently do not support Series[scalar key] = Series item/DataFrame item since this results in a nested series
+        # or df.
+        if is_scalar(key) and isinstance(value, BasePandasDataset):
+            raise ValueError(
+                SERIES_SETITEM_INCOMPATIBLE_INDEXER_WITH_SCALAR_ERROR_MESSAGE.format(
+                    "Snowpark pandas " + value.__class__.__name__
+                    if isinstance(value, BasePandasDataset)
+                    else value.__class__.__name__
+                )
+            )
+        if isinstance(key, pd.DataFrame):
+            raise ValueError(
+                SERIES_SETITEM_INCOMPATIBLE_INDEXER_WITH_SERIES_ERROR_MESSAGE
+            )
+        elif (isinstance(key, pd.Series) or is_list_like(key)) and (
+            isinstance(value, range)
+        ):
+            raise NotImplementedError(
+                SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE
+            )
+        elif isinstance(value, slice):
+            # Here, the whole slice is assigned as a scalar variable, i.e., a spot at an index gets a slice value.
+            raise NotImplementedError(
+                SERIES_SETITEM_SLICE_AS_SCALAR_VALUE_ERROR_MESSAGE
+            )
+
+        if isinstance(key, (slice, range)):
+            if (key.start is None or is_integer(key.start)) and (  # pragma: no cover
+                key.stop is None or is_integer(key.stop)
+            ):
+                # integer slice behaves the same as iloc slice
+                self.iloc[key] = value  # pragma: no cover
+            else:
+                # TODO: SNOW-976232 once the slice test is added to test_setitem, code here should be covered.
+                self.loc[key] = value  # pragma: no cover
+
+        elif isinstance(value, Series):
+            # If value is a Series, value's index doesn't matter/is ignored. However, loc setitem matches the key's
+            # index with value's index. To emulate this behavior, treat the Series as if it is matching by position.
+            #
+            # For example,
+            # With __setitem__, the index of value does not matter.
+            # >>> series = pd.Series([1, 2, 3], index=["a", "b", "c"])
+            # >>> series[["a", "b"]] = pd.Series([9, 8])
+            # a    9
+            # b    8
+            # c    3
+            # dtype: int64
+            # value = pd.Series([9, 8], index=["foo", "bar"]) also produces same result as above.
+            #
+            # However, with loc setitem, index matters.
+            # >>> series.loc[["a", "b"]] = pd.Series([9, 8])
+            # a    NaN
+            # b    NaN
+            # c    3.0
+            # dtype: float64
+            #
+            # >>> series.loc[["a", "b"]] = pd.Series([9, 8], index=["a", "b"])
+            # a    9
+            # b    8
+            # c    3
+            # dtype: int64
+            # Due to the behavior above, loc setitem can work with any kind of value regardless of length.
+            # With __setitem__, the length of the value must match length of the key. Currently, loc setitem can
+            # handle this with boolean keys.
+
+            # Convert list-like keys to Series.
+            if not isinstance(key, pd.Series) and is_list_like(key):
+                key = pd.Series(key)
+
+            index_is_bool_indexer = False
+
+            if isinstance(key, pd.Series) and is_bool_dtype(key.dtype):
+                index_is_bool_indexer = True  # pragma: no cover
+            elif is_bool_indexer(key):
+                index_is_bool_indexer = True  # pragma: no cover
+
+            new_qc = self._query_compiler.set_2d_labels(
+                key._query_compiler if isinstance(key, BasePandasDataset) else key,
+                slice(None),  # column key is not applicable to Series objects
+                value._query_compiler,
+                matching_item_columns_by_label=False,
+                matching_item_rows_by_label=False,
+                index_is_bool_indexer=index_is_bool_indexer,
+            )
+            self._update_inplace(new_query_compiler=new_qc)
+
+        else:
+            self.loc[key] = value
+
+    def __sub__(self, right):
+        return self.sub(right)
+
+    def __rsub__(self, left):
+        return self.rsub(left)
+
+    def __truediv__(self, right):
+        return self.truediv(right)
+
+    def __rtruediv__(self, left):
+        return self.rtruediv(left)
+
+    __iadd__ = __add__
+    __imul__ = __add__
+    __ipow__ = __pow__
+    __isub__ = __sub__
+    __itruediv__ = __truediv__
+
+    def add(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return Addition of series and other, element-wise (binary operator add).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().add(other, level=level, fill_value=fill_value, axis=axis)
+
+    def radd(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return Addition of series and other, element-wise (binary operator radd).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().radd(other, level=level, fill_value=fill_value, axis=axis)
+
+    def add_prefix(self, prefix):  # noqa: PR01, RT01, D200
+        """
+        Prefix labels with string `prefix`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        # pandas converts non-string prefix values into str and adds it to the index labels.
+        return self.__constructor__(
+            query_compiler=self._query_compiler.add_substring(
+                str(prefix), substring_type="prefix", axis=0
+            )
+        )
+
+    def add_suffix(self, suffix):
+        """
+        Suffix labels with string `suffix`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        # pandas converts non-string suffix values into str and appends it to the index labels.
+        return self.__constructor__(
+            query_compiler=self._query_compiler.add_substring(
+                str(suffix), substring_type="suffix", axis=0
+            )
+        )
+
+    def drop(
+        self,
+        labels: IndexLabel = None,
+        axis: Axis = 0,
+        index: IndexLabel = None,
+        columns: IndexLabel = None,
+        level: Level | None = None,
+        inplace: bool = False,
+        errors: IgnoreRaise = "raise",
+    ) -> Series | None:
+        """
+        Drop specified labels from `BasePandasDataset`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().drop(
+            labels=labels,
+            axis=axis,
+            index=index,
+            columns=columns,
+            level=level,
+            inplace=inplace,
+            errors=errors,
+        )
+
+    def aggregate(
+        self, func: AggFuncType = None, axis: Axis = 0, *args: Any, **kwargs: Any
+    ):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if is_dict_like(func):
+            raise SpecificationError(
+                "Value for func argument in dict format is not allowed for Series aggregate."
+            )
+
+        return super().aggregate(func, axis, *args, **kwargs)
+
+    agg = aggregate
+
+    def apply(
+        self,
+        func: AggFuncType,
+        convert_dtype: bool = True,
+        args: tuple[Any, ...] = (),
+        **kwargs: Any,
+    ):
+        """
+        Apply a function along an axis of the `BasePandasDataset`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        self._validate_function(func)
+        new_query_compiler = self._query_compiler.apply_on_series(func, args, **kwargs)
+
+        if convert_dtype:
+            # TODO SNOW-810614: call convert_dtypes for consistency
+            WarningMessage.ignored_argument(
+                operation="apply",
+                argument="convert_dtype",
+                message="convert_dtype is ignored in Snowflake backend",
+            )
+
+        return self.__constructor__(query_compiler=new_query_compiler)
+
+    def argmax(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return int position of the largest value in the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        result = self.idxmax(axis=axis, skipna=skipna, *args, **kwargs)
+        if np.isnan(result) or result is pandas.NA:
+            result = -1
+        return result
+
+    def argmin(self, axis=None, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return int position of the smallest value in the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        result = self.idxmin(axis=axis, skipna=skipna, *args, **kwargs)
+        if np.isnan(result) or result is pandas.NA:
+            result = -1
+        return result
+
+    def argsort(self, axis=0, kind="quicksort", order=None):  # noqa: PR01, RT01, D200
+        """
+        Return the integer indices that would sort the Series values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.argsort, axis=axis, kind=kind, order=order
+        )
+
+    def autocorr(self, lag=1):  # noqa: PR01, RT01, D200
+        """
+        Compute the lag-N autocorrelation.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self.corr(self.shift(lag))
+
+    def between(self, left, right, inclusive: str = "both"):  # noqa: PR01, RT01, D200
+        """
+        Return boolean Series equivalent to left <= series <= right.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.between, left, right, inclusive=inclusive
+        )
+
+    def compare(
+        self,
+        other: Series,
+        align_axis: str | int = 1,
+        keep_shape: bool = False,
+        keep_equal: bool = False,
+        result_names: tuple = ("self", "other"),
+    ) -> Series:  # noqa: PR01, RT01, D200
+        """
+        Compare to another Series and show the differences.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        if not isinstance(other, Series):
+            raise TypeError(f"Cannot compare Series to {type(other)}")
+        result = self.to_frame().compare(
+            other.to_frame(),
+            align_axis=align_axis,
+            keep_shape=keep_shape,
+            keep_equal=keep_equal,
+            result_names=result_names,
+        )
+        if align_axis == "columns" or align_axis == 1:
+            # pandas.DataFrame.Compare returns a dataframe with a multidimensional index object as the
+            # columns so we have to change column object back.
+            result.columns = pandas.Index(["self", "other"])
+        else:
+            result = result.squeeze().rename(None)
+        return result
+
+    def corr(self, other, method="pearson", min_periods=None):  # noqa: PR01, RT01, D200
+        """
+        Compute correlation with `other` Series, excluding missing values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        if method == "pearson":
+            this, other = self.align(other, join="inner", copy=False)
+            this = self.__constructor__(this)
+            other = self.__constructor__(other)
+
+            if len(this) == 0:
+                return np.nan
+            if len(this) != len(other):
+                raise ValueError("Operands must have same size")
+
+            if min_periods is None:
+                min_periods = 1
+
+            valid = this.notna() & other.notna()
+            if not valid.all():
+                this = this[valid]
+                other = other[valid]
+            if len(this) < min_periods:
+                return np.nan
+
+            this = this.astype(dtype="float64")
+            other = other.astype(dtype="float64")
+            this -= this.mean()
+            other -= other.mean()
+
+            other = other.__constructor__(query_compiler=other._query_compiler.conj())
+            result = this * other / (len(this) - 1)
+            result = np.array([result.sum()])
+
+            stddev_this = ((this * this) / (len(this) - 1)).sum()
+            stddev_other = ((other * other) / (len(other) - 1)).sum()
+
+            stddev_this = np.array([np.sqrt(stddev_this)])
+            stddev_other = np.array([np.sqrt(stddev_other)])
+
+            result /= stddev_this * stddev_other
+
+            np.clip(result.real, -1, 1, out=result.real)
+            if np.iscomplexobj(result):
+                np.clip(result.imag, -1, 1, out=result.imag)
+            return result[0]
+
+        return self.__constructor__(
+            query_compiler=self._query_compiler.default_to_pandas(
+                pandas.Series.corr,
+                other._query_compiler,
+                method=method,
+                min_periods=min_periods,
+            )
+        )
+
+    def count(self):
+        return super().count()
+
+    def cov(
+        self, other, min_periods=None, ddof: int | None = 1
+    ):  # noqa: PR01, RT01, D200
+        """
+        Compute covariance with Series, excluding missing values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        this, other = self.align(other, join="inner", copy=False)
+        this = self.__constructor__(this)
+        other = self.__constructor__(other)
+        if len(this) == 0:
+            return np.nan
+
+        if len(this) != len(other):
+            raise ValueError("Operands must have same size")
+
+        if min_periods is None:
+            min_periods = 1
+
+        valid = this.notna() & other.notna()
+        if not valid.all():
+            this = this[valid]
+            other = other[valid]
+
+        if len(this) < min_periods:
+            return np.nan
+
+        this = this.astype(dtype="float64")
+        other = other.astype(dtype="float64")
+
+        this -= this.mean()
+        other -= other.mean()
+
+        other = other.__constructor__(query_compiler=other._query_compiler.conj())
+        result = this * other / (len(this) - ddof)
+        result = result.sum()
+        return result
+
+    def describe(
+        self,
+        percentiles: ListLike | None = None,
+        include: ListLike | Literal["all"] | None = None,
+        exclude: ListLike | None = None,
+    ) -> Series:
+        """
+        Generate descriptive statistics.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().describe(
+            percentiles=percentiles,
+            include=None,
+            exclude=None,
+        )
+
+    def diff(self, periods: int = 1):
+        """
+        First discrete difference of element.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().diff(periods=periods, axis=0)
+
+    def divmod(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return Integer division and modulo of series and `other`, element-wise (binary operator `divmod`).
+        Not implemented
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+
+    def dot(self, other):  # noqa: PR01, RT01, D200
+        """
+        Compute the dot product between the Series and the columns of `other`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if isinstance(other, BasePandasDataset):
+            common = self.index.union(other.index)
+            if len(common) > len(self) or len(common) > len(other):  # pragma: no cover
+                raise ValueError("Matrices are not aligned")
+
+            if isinstance(other, Series):
+                return self._reduce_dimension(
+                    query_compiler=self._query_compiler.dot(
+                        other.reindex(index=common), squeeze_self=True
+                    )
+                )
+            else:
+                return self.__constructor__(
+                    query_compiler=self._query_compiler.dot(
+                        other.reindex(index=common), squeeze_self=True
+                    )
+                )
+
+        other = np.asarray(other)
+        if self.shape[0] != other.shape[0]:
+            raise ValueError(
+                f"Dot product shape mismatch, {self.shape} vs {other.shape}"
+            )
+
+        if len(other.shape) > 1:
+            return (
+                self._query_compiler.dot(other, squeeze_self=True).to_numpy().squeeze()
+            )
+
+        return self._reduce_dimension(
+            query_compiler=self._query_compiler.dot(other, squeeze_self=True)
+        )
+
+    def drop_duplicates(self, keep="first", inplace=False):  # noqa: PR01, RT01, D200
+        """
+        Return Series with duplicate values removed.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().drop_duplicates(keep=keep, inplace=inplace)
+
+    def dropna(
+        self,
+        *,
+        axis: Axis = 0,
+        inplace: bool = False,
+        how: str | NoDefault = no_default,
+    ):
+        """
+        Return a new Series with missing values removed.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super()._dropna(axis=axis, inplace=inplace, how=how)
+
+    def duplicated(self, keep: DropKeep = "first"):
+        """
+        Indicate duplicate Series values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        name = self.name
+        series = self.to_frame().duplicated(keep=keep)
+        # we are using df.duplicated method for series but its result will lose the series name, so we preserve it here
+        series.name = name
+        return series
+
+    def eq(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return Equal to of series and `other`, element-wise (binary operator `eq`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().eq(other, level=level, axis=axis)
+
+    def equals(self, other):  # noqa: PR01, RT01, D200
+        """
+        Test whether two objects contain the same elements.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        return (
+            self.name == other.name
+            and self.index.equals(other.index)
+            and self.eq(other).all()
+        )
+
+    def explode(self, ignore_index: bool = False):  # noqa: PR01, RT01, D200
+        """
+        Transform each element of a list-like to a row.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+
+        return super().explode(
+            MODIN_UNNAMED_SERIES_LABEL if self.name is None else self.name,
+            ignore_index=ignore_index,
+        )
+
+    def factorize(
+        self, sort=False, na_sentinel=no_default, use_na_sentinel=no_default
+    ):  # noqa: PR01, RT01, D200
+        """
+        Encode the object as an enumerated type or categorical variable.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.factorize,
+            sort=sort,
+            na_sentinel=na_sentinel,
+            use_na_sentinel=use_na_sentinel,
+        )
+
+    def fillna(
+        self,
+        value: Hashable | Mapping | Series = None,
+        *,
+        method: FillnaOptions | None = None,
+        axis: Axis | None = None,
+        inplace: bool = False,
+        limit: int | None = None,
+        downcast: dict | None = None,
+    ) -> Series | None:
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if isinstance(value, BasePandasDataset) and not isinstance(value, Series):
+            raise TypeError(
+                '"value" parameter must be a scalar, dict or Series, but '
+                + f'you passed a "{type(value).__name__}"'
+            )
+        return super().fillna(
+            self_is_series=True,
+            value=value,
+            method=method,
+            axis=axis,
+            inplace=inplace,
+            limit=limit,
+            downcast=downcast,
+        )
+
+    def floordiv(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Get Integer division of dataframe and `other`, element-wise (binary operator `floordiv`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().floordiv(other, level=level, fill_value=fill_value, axis=axis)
+
+    def ge(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return greater than or equal to of series and `other`, element-wise (binary operator `ge`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().ge(other, level=level, axis=axis)
+
+    def groupby(
+        self,
+        by=None,
+        axis: Axis = 0,
+        level: IndexLabel | None = None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        observed: bool | NoDefault = no_default,
+        dropna: bool = True,
+    ):
+        """
+        Group Series using a mapper or by a Series of columns.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        from snowflake.snowpark.modin.pandas.groupby import (
+            SeriesGroupBy,
+            validate_groupby_args,
+        )
+
+        validate_groupby_args(by, level, observed)
+
+        if not as_index:
+            raise TypeError("as_index=False only valid with DataFrame")
+
+        axis = self._get_axis_number(axis)
+        return SeriesGroupBy(
+            self,
+            by,
+            axis,
+            level,
+            as_index,
+            sort,
+            group_keys,
+            idx_name=None,
+            observed=observed,
+            dropna=dropna,
+        )
+
+    def gt(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return greater than of series and `other`, element-wise (binary operator `gt`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().gt(other, level=level, axis=axis)
+
+    def hist(
+        self,
+        by=None,
+        ax=None,
+        grid=True,
+        xlabelsize=None,
+        xrot=None,
+        ylabelsize=None,
+        yrot=None,
+        figsize=None,
+        bins=10,
+        **kwds,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Draw histogram of the input series using matplotlib.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.hist,
+            by=by,
+            ax=ax,
+            grid=grid,
+            xlabelsize=xlabelsize,
+            xrot=xrot,
+            ylabelsize=ylabelsize,
+            yrot=yrot,
+            figsize=figsize,
+            bins=bins,
+            **kwds,
+        )
+
+    def idxmax(self, axis=0, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return the row label of the maximum value.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire Series is NA, the result will be NA.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Index, the label of the maximum value.
+
+        Examples
+        --------
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmax()
+        'C'
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if skipna is None:
+            skipna = True
+        return super().idxmax(axis=axis, skipna=skipna, *args, **kwargs)
+
+    def idxmin(self, axis=0, skipna=True, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return the row label of the minimum value.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire Series is NA, the result will be NA.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Index, the label of the minimum value.
+
+        Examples
+        --------
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmin()
+        'A'
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if skipna is None:
+            skipna = True
+        return super().idxmin(axis=axis, skipna=skipna, *args, **kwargs)
+
+    def info(
+        self,
+        verbose: bool | None = None,
+        buf: IO[str] | None = None,
+        max_cols: int | None = None,
+        memory_usage: bool | str | None = None,
+        show_counts: bool = True,
+    ):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._default_to_pandas(
+            pandas.Series.info,
+            verbose=verbose,
+            buf=buf,
+            max_cols=max_cols,
+            memory_usage=memory_usage,
+            show_counts=show_counts,
+        )
+
+    def interpolate(
+        self,
+        method="linear",
+        axis=0,
+        limit=None,
+        inplace=False,
+        limit_direction: str | None = None,
+        limit_area=None,
+        downcast=None,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Fill NaN values using an interpolation method.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.interpolate,
+            method=method,
+            axis=axis,
+            limit=limit,
+            inplace=inplace,
+            limit_direction=limit_direction,
+            limit_area=limit_area,
+            downcast=downcast,
+            **kwargs,
+        )
+
+    def item(self):  # noqa: RT01, D200
+        """
+        Return the first element of the underlying data as a Python scalar.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self[0]
+
+    def items(self):  # noqa: D200
+        """
+        Lazily iterate over (index, value) tuples.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+
+        def item_builder(s):
+            return s.name, s.squeeze()
+
+        partition_iterator = PartitionIterator(self.to_frame(), 0, item_builder)
+        yield from partition_iterator
+
+    def keys(self):  # noqa: RT01, D200
+        """
+        Return alias for index.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.index
+
+    def kurt(
+        self,
+        axis: Axis | None | NoDefault = no_default,
+        skipna=True,
+        numeric_only=False,
+        **kwargs,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return unbiased kurtosis over requested axis.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        axis = self._get_axis_number(axis)
+        return super().kurt(axis, skipna, numeric_only, **kwargs)
+
+    kurtosis = kurt
+
+    def le(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return less than or equal to of series and `other`, element-wise (binary operator `le`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().le(other, level=level, axis=axis)
+
+    def lt(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return less than of series and `other`, element-wise (binary operator `lt`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().lt(other, level=level, axis=axis)
+
+    def map(
+        self,
+        arg: Callable | Mapping | Series,
+        na_action: Literal["ignore"] | None = None,
+    ) -> Series:
+        """
+        Map values of Series according to input correspondence.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.__constructor__(
+            query_compiler=self._query_compiler.map(arg, na_action)
+        )
+
+    def mask(
+        self,
+        cond: DataFrame | Series | Callable | AnyArrayLike,
+        other: DataFrame | Series | Callable | Scalar | None = np.nan,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().mask(
+            cond,
+            other=other,
+            inplace=inplace,
+            axis=axis,
+            level=level,
+        )
+
+    def memory_usage(self, index=True, deep=False):  # noqa: PR01, RT01, D200
+        """
+        Return the memory usage of the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        if index:
+            result = self._reduce_dimension(
+                self._query_compiler.memory_usage(index=False, deep=deep)
+            )
+            index_value = self.index.memory_usage(deep=deep)
+            return result + index_value
+        return super().memory_usage(index=index, deep=deep)
+
+    def mod(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return Modulo of series and `other`, element-wise (binary operator `mod`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().mod(other, level=level, fill_value=fill_value, axis=axis)
+
+    def mode(self, dropna=True):  # noqa: PR01, RT01, D200
+        """
+        Return the mode(s) of the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return super().mode(numeric_only=False, dropna=dropna)
+
+    def mul(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return multiplication of series and `other`, element-wise (binary operator `mul`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().mul(other, level=level, fill_value=fill_value, axis=axis)
+
+    multiply = mul
+
+    def rmul(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return multiplication of series and `other`, element-wise (binary operator `mul`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rmul(other, level=level, fill_value=fill_value, axis=axis)
+
+    def ne(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return not equal to of series and `other`, element-wise (binary operator `ne`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().ne(other, level=level, axis=axis)
+
+    def nlargest(self, n=5, keep="first"):  # noqa: PR01, RT01, D200
+        """
+        Return the largest `n` elements.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(pandas.Series.nlargest, n=n, keep=keep)
+
+    def nsmallest(self, n=5, keep="first"):  # noqa: PR01, RT01, D200
+        """
+        Return the smallest `n` elements.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.nsmallest(n=n, keep=keep)
+        )
+
+    def set_axis(
+        self,
+        labels: IndexLabel,
+        *,
+        axis: Axis = 0,
+        copy: bool | NoDefault = no_default,  # ignored
+    ):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if not is_scalar(axis):
+            raise TypeError(f"{type(axis).__name__} is not a valid type for axis.")
+        return super().set_axis(
+            labels=labels,
+            # 'rows', 'index, and 0 are valid axis values for Series.
+            # 'columns' and 1 are valid axis values only for DataFrame.
+            axis=pandas.Series._get_axis_name(axis),
+            copy=copy,
+        )
+
+    def unstack(self, level=-1, fill_value=None):  # noqa: PR01, RT01, D200
+        """
+        Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+
+        result = DataFrame(
+            query_compiler=self._query_compiler.unstack(level, fill_value)
+        )
+
+        return result.droplevel(0, axis=1) if result.columns.nlevels > 1 else result
+
+    @property
+    def plot(
+        self,
+        kind="line",
+        ax=None,
+        figsize=None,
+        use_index=True,
+        title=None,
+        grid=None,
+        legend=False,
+        style=None,
+        logx=False,
+        logy=False,
+        loglog=False,
+        xticks=None,
+        yticks=None,
+        xlim=None,
+        ylim=None,
+        rot=None,
+        fontsize=None,
+        colormap=None,
+        table=False,
+        yerr=None,
+        xerr=None,
+        label=None,
+        secondary_y=False,
+        **kwds,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Make plot of Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._to_pandas().plot
+
+    def pow(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return exponential power of series and `other`, element-wise (binary operator `pow`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().pow(other, level=level, fill_value=fill_value, axis=axis)
+
+    def prod(
+        self,
+        axis=None,
+        skipna=True,
+        level=None,
+        numeric_only=False,
+        min_count=0,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+        validate_bool_kwarg(skipna, "skipna", none_allowed=False)
+        axis = self._get_axis_number(axis)
+        if level is not None:
+            if (
+                not self._query_compiler.has_multiindex(axis=axis)
+                and level > 0
+                or level < -1
+                and level != self.index.name
+            ):
+                raise ValueError("level > 0 or level < -1 only valid with MultiIndex")
+            return self.groupby(level=level, axis=axis, sort=False).prod(
+                numeric_only=numeric_only, min_count=min_count, **kwargs
+            )
+        new_index = self.columns if axis else self.index
+        if min_count > len(new_index):
+            return np.nan
+
+        data = self._validate_dtypes_sum_prod_mean(axis, numeric_only, ignore_axis=True)
+        if min_count > 1:
+            return data._reduce_dimension(
+                data._query_compiler.prod_min_count(
+                    axis=axis,
+                    skipna=skipna,
+                    level=level,
+                    numeric_only=numeric_only,
+                    min_count=min_count,
+                    **kwargs,
+                )
+            )
+        return data._reduce_dimension(
+            data._query_compiler.prod(
+                axis=axis,
+                skipna=skipna,
+                level=level,
+                numeric_only=numeric_only,
+                min_count=min_count,
+                **kwargs,
+            )
+        )
+
+    product = prod
+
+    def ravel(self, order="C"):  # noqa: PR01, RT01, D200
+        """
+        Return the flattened underlying data as an ndarray.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        data = self._query_compiler.to_numpy().flatten(order=order)
+        if isinstance(self.dtype, pandas.CategoricalDtype):
+            data = pandas.Categorical(data, dtype=self.dtype)
+
+        return data
+
+    def reindex(self, *args, **kwargs):
+        if args:
+            if len(args) > 1:
+                raise TypeError("Only one positional argument ('index') is allowed")
+            if "index" in kwargs:
+                raise TypeError(
+                    "'index' passed as both positional and keyword argument"
+                )
+            kwargs.update({"index": args[0]})
+        index = kwargs.pop("index", None)
+        method = kwargs.pop("method", None)
+        level = kwargs.pop("level", None)
+        copy = kwargs.pop("copy", True)
+        limit = kwargs.pop("limit", None)
+        tolerance = kwargs.pop("tolerance", None)
+        fill_value = kwargs.pop("fill_value", None)
+        if kwargs:
+            raise TypeError(
+                "reindex() got an unexpected keyword "
+                + f'argument "{list(kwargs.keys())[0]}"'
+            )
+        return super().reindex(
+            index=index,
+            columns=None,
+            method=method,
+            level=level,
+            copy=copy,
+            limit=limit,
+            tolerance=tolerance,
+            fill_value=fill_value,
+        )
+
+    def rename(
+        self,
+        index: Renamer | Hashable | None = None,
+        *,
+        axis: Axis | None = None,
+        copy: bool | None = None,
+        inplace: bool = False,
+        level: Level | None = None,
+        errors: IgnoreRaise = "ignore",
+    ) -> Series | None:
+        """
+        Alter Series index labels or name.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if axis is not None:
+            # make sure we raise if an invalid 'axis' is passed.
+            # note: axis is unused. It's needed for compatibility with DataFrame.
+            self._get_axis_number(axis)
+
+        if copy is not None:
+            WarningMessage.ignored_argument(
+                operation="series.rename",
+                argument="copy",
+                message="copy parameter has been ignored with Snowflake execution engine",
+            )
+
+        if callable(index) or is_dict_like(index):
+            if isinstance(index, dict):
+                index = Series(index)
+            new_qc = self._query_compiler.rename(
+                index_renamer=index, level=level, errors=errors
+            )
+            new_series = self._create_or_update_from_compiler(
+                new_query_compiler=new_qc, inplace=inplace
+            )
+            if not inplace and hasattr(self, "name"):
+                new_series.name = self.name
+            return new_series
+        else:
+            # just change Series.name
+            if inplace:
+                self.name = index
+            else:
+                self_cp = self.copy()
+                self_cp.name = index
+                return self_cp
+
+    def repeat(self, repeats, axis=None):  # noqa: PR01, RT01, D200
+        """
+        Repeat elements of a Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        if (isinstance(repeats, int) and repeats == 0) or (
+            is_list_like(repeats) and len(repeats) == 1 and repeats[0] == 0
+        ):
+            return self.__constructor__()
+
+        return self.__constructor__(query_compiler=self._query_compiler.repeat(repeats))
+
+    def reset_index(
+        self,
+        level=None,
+        drop=False,
+        name=no_default,
+        inplace=False,
+        allow_duplicates=False,
+    ):
+        """
+        Generate a new Series with the index reset.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if name is no_default:
+            # For backwards compatibility, keep columns as [0] instead of
+            #  [None] when self.name is None
+            name = 0 if self.name is None else self.name
+
+        if not drop and inplace:
+            raise TypeError(
+                "Cannot reset_index inplace on a Series to create a DataFrame"
+            )
+        else:
+            obj = self.copy()
+            obj.name = name
+            new_query_compiler = obj._query_compiler.reset_index(
+                drop=drop,
+                level=level,
+                col_level=0,
+                col_fill="",
+                allow_duplicates=allow_duplicates,
+                names=None,
+            )
+            return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def rdivmod(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return integer division and modulo of series and `other`, element-wise (binary operator `rdivmod`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+
+    def rfloordiv(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return integer division of series and `other`, element-wise (binary operator `rfloordiv`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rfloordiv(other, level=level, fill_value=fill_value, axis=axis)
+
+    def rmod(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return modulo of series and `other`, element-wise (binary operator `rmod`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rmod(other, level=level, fill_value=fill_value, axis=axis)
+
+    def round(self, decimals=0, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Round each value in a Series to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default 0
+            Number of decimal places to round to. If decimals is negative, it specifies the number of positions to the left of the decimal point.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Series
+            Rounded values of the Series.
+
+        See Also
+        --------
+            numpy.around : Round values of an np.array.
+            DataFrame.round : Round values of a DataFrame.
+
+        Examples
+        --------
+        >>> s = pd.Series([0.1, 1.3, 2.7])
+        >>> s.round()
+        0    0.0
+        1    1.0
+        2    3.0
+        dtype: float64
+        """
+        return super().round(decimals, args=args, **kwargs)
+
+    def rpow(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return exponential power of series and `other`, element-wise (binary operator `rpow`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rpow(other, level=level, fill_value=fill_value, axis=axis)
+
+    def rsub(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return subtraction of series and `other`, element-wise (binary operator `rsub`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rsub(other, level=level, fill_value=fill_value, axis=axis)
+
+    def rtruediv(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return floating division of series and `other`, element-wise (binary operator `rtruediv`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().rtruediv(other, level=level, fill_value=fill_value, axis=axis)
+
+    rdiv = rtruediv
+
+    def quantile(
+        self,
+        q: Scalar | ListLike = 0.5,
+        interpolation: Literal[
+            "linear", "lower", "higher", "midpoint", "nearest"
+        ] = "linear",
+    ):
+        """
+        Return value at the given quantile.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().quantile(
+            q=q,
+            axis=0,
+            numeric_only=False,
+            interpolation=interpolation,
+            method="single",
+        )
+
+    def reorder_levels(self, order):  # noqa: PR01, RT01, D200
+        """
+        Rearrange index levels using input order.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return super().reorder_levels(order)
+
+    def replace(
+        self,
+        to_replace=None,
+        value=no_default,
+        inplace=False,
+        limit=None,
+        regex=False,
+        method: str | NoDefault = no_default,
+    ):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        inplace = validate_bool_kwarg(inplace, "inplace")
+        # The following errors cannot be raised by query compiler because we don't know
+        # if frontend object is Series or DataFrame.
+        if to_replace is not None and is_dict_like(value):
+            raise ValueError(
+                "In Series.replace 'to_replace' must be None if the 'value' is dict-like"
+            )
+        if is_dict_like(to_replace) and value != no_default:
+            raise ValueError(
+                "In Series.replace 'to_replace' cannot be dict-like if 'value' is provided"
+            )
+        new_query_compiler = self._query_compiler.replace(
+            to_replace=to_replace,
+            value=value,
+            limit=limit,
+            regex=regex,
+            method=method,
+        )
+        return self._create_or_update_from_compiler(new_query_compiler, inplace)
+
+    def searchsorted(self, value, side="left", sorter=None):  # noqa: PR01, RT01, D200
+        """
+        Find indices where elements should be inserted to maintain order.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        searchsorted_qc = self._query_compiler
+        if sorter is not None:
+            # `iloc` method works slowly (https://github.com/modin-project/modin/issues/1903),
+            # so _default_to_pandas is used for now
+            # searchsorted_qc = self.iloc[sorter].reset_index(drop=True)._query_compiler
+            # sorter = None
+            return self._default_to_pandas(
+                pandas.Series.searchsorted, value, side=side, sorter=sorter
+            )
+        # searchsorted should return item number irrespective of Series index, so
+        # Series.index is always set to pandas.RangeIndex, which can be easily processed
+        # on the query_compiler level
+        if not isinstance(searchsorted_qc.index, pandas.RangeIndex):
+            searchsorted_qc = searchsorted_qc.reset_index(drop=True)
+
+        result = self.__constructor__(
+            query_compiler=searchsorted_qc.searchsorted(
+                value=value, side=side, sorter=sorter
+            )
+        ).squeeze()
+
+        # matching pandas output
+        if not is_scalar(value) and not is_list_like(result):
+            result = np.array([result])
+        elif isinstance(result, type(self)):
+            result = result.to_numpy()
+
+        return result
+
+    def sort_values(
+        self,
+        axis: Axis = 0,
+        ascending: bool | int | Sequence[bool] | Sequence[int] = True,
+        inplace: bool = False,
+        kind: str = "quicksort",
+        na_position: str = "last",
+        ignore_index: bool = False,
+        key: IndexKeyFunc | None = None,
+    ):
+        """
+        Sort by the values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+
+        if is_list_like(ascending) and len(ascending) != 1:
+            raise ValueError(
+                f"Length of ascending ({len(ascending)}) must be 1 for Series"
+            )
+
+        if axis is not None:
+            # Validate `axis`
+            self._get_axis_number(axis)
+
+        # When we convert to a DataFrame, the name is automatically converted to 0 if it
+        # is None, so we do this to avoid a KeyError.
+        by = self.name if self.name is not None else 0
+        result = (
+            DataFrame(self.copy())
+            .sort_values(
+                by=by,
+                ascending=ascending,
+                inplace=False,
+                kind=kind,
+                na_position=na_position,
+                ignore_index=ignore_index,
+                key=key,
+            )
+            .squeeze(axis=1)
+        )
+        result.name = self.name
+        return self._create_or_update_from_compiler(
+            result._query_compiler, inplace=inplace
+        )
+
+    sparse = CachedAccessor("sparse", SparseAccessor)
+
+    def squeeze(self, axis: Axis | None = None):
+        """
+        Squeeze 1 dimensional axis objects into scalars.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if axis is not None:
+            # Validate `axis`
+            pandas.Series._get_axis_number(axis)
+        if len(self) == 1:
+            return self._reduce_dimension(self._query_compiler)
+        else:
+            return self.copy()
+
+    def sub(self, other, level=None, fill_value=None, axis=0):  # noqa: PR01, RT01, D200
+        """
+        Return subtraction of Series and `other`, element-wise (binary operator `sub`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().sub(other, level=level, fill_value=fill_value, axis=axis)
+
+    subtract = sub
+
+    def swaplevel(self, i=-2, j=-1, copy=True):  # noqa: PR01, RT01, D200
+        """
+        Swap levels `i` and `j` in a `MultiIndex`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas("swaplevel", i=i, j=j, copy=copy)
+
+    def take(
+        self,
+        indices: list | AnyArrayLike,
+        axis: Axis = 0,
+        **kwargs,
+    ):
+        """
+        Return the elements in the given positional indices along an axis.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().take(indices, axis=0, **kwargs)
+
+    def to_dict(self, into: type[dict] = dict) -> dict:
+        """
+        Convert Series to {label -> value} dict or dict-like object.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._to_pandas().to_dict(into=into)
+
+    def to_frame(
+        self, name: Hashable = no_default
+    ) -> DataFrame:  # noqa: PR01, RT01, D200
+        """
+        Convert Series to {label -> value} dict or dict-like object.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+
+        if name is None:
+            name = no_default
+
+        self_cp = self.copy()
+        if name is not no_default:
+            self_cp.name = name
+
+        return DataFrame(self_cp)
+
+    def to_list(self) -> list:
+        """
+        Return a list of the values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.values.tolist()
+
+    def to_numpy(
+        self,
+        dtype: npt.DTypeLike | None = None,
+        copy: bool = False,
+        na_value: object = no_default,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """
+        Return the NumPy ndarray representing the values in this Series or Index.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return (
+            super()
+            .to_numpy(
+                dtype=dtype,
+                copy=copy,
+                na_value=na_value,
+                **kwargs,
+            )
+            .flatten()
+        )
+
+    tolist = to_list
+
+    # TODO(williamma12): When we implement to_timestamp, have this call the version
+    # in base.py
+    def to_period(self, freq=None, copy=True):  # noqa: PR01, RT01, D200
+        """
+        Cast to PeriodArray/Index at a particular frequency.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas("to_period", freq=freq, copy=copy)
+
+    def to_string(
+        self,
+        buf=None,
+        na_rep="NaN",
+        float_format=None,
+        header=True,
+        index=True,
+        length=False,
+        dtype=False,
+        name=False,
+        max_rows=None,
+        min_rows=None,
+    ):  # noqa: PR01, RT01, D200
+        """
+        Render a string representation of the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.to_string,
+            buf=buf,
+            na_rep=na_rep,
+            float_format=float_format,
+            header=header,
+            index=index,
+            length=length,
+            dtype=dtype,
+            name=name,
+            max_rows=max_rows,
+        )
+
+    # TODO(williamma12): When we implement to_timestamp, have this call the version
+    # in base.py
+    def to_timestamp(self, freq=None, how="start", copy=True):  # noqa: PR01, RT01, D200
+        """
+        Cast to DatetimeIndex of Timestamps, at beginning of period.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas("to_timestamp", freq=freq, how=how, copy=copy)
+
+    def transpose(self, *args, **kwargs):  # noqa: PR01, RT01, D200
+        """
+        Return the transpose, which is by definition `self`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self
+
+    T = property(transpose)
+
+    def truediv(
+        self, other, level=None, fill_value=None, axis=0
+    ):  # noqa: PR01, RT01, D200
+        """
+        Return floating division of series and `other`, element-wise (binary operator `truediv`).
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().truediv(other, level=level, fill_value=fill_value, axis=axis)
+
+    div = divide = truediv
+
+    def truncate(
+        self, before=None, after=None, axis=None, copy=True
+    ):  # noqa: PR01, RT01, D200
+        """
+        Truncate a Series before and after some index value.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self._default_to_pandas(
+            pandas.Series.truncate, before=before, after=after, axis=axis, copy=copy
+        )
+
+    def unique(self):
+        """
+        Return unique values of Series object.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.__constructor__(
+            query_compiler=self._query_compiler.unique()
+        ).to_numpy()
+
+    def update(self, other):  # noqa: PR01, D200
+        """
+        Modify Series in place using values from passed Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if not isinstance(other, Series):
+            other = self.__constructor__(other)
+        query_compiler = self._query_compiler.series_update(other)
+        self._update_inplace(new_query_compiler=query_compiler)
+
+    def value_counts(
+        self,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        bins: int | None = None,
+        dropna: bool = True,
+    ):
+        """
+        Return a Series containing counts of unique values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.__constructor__(
+            query_compiler=self._query_compiler.value_counts(
+                subset=None,
+                normalize=normalize,
+                sort=sort,
+                ascending=ascending,
+                bins=bins,
+                dropna=dropna,
+            ).set_index_names([self.name]),
+            name="proportion" if normalize else "count",
+        )
+
+    def view(self, dtype=None):  # noqa: PR01, RT01, D200
+        """
+        Create a new view of the Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self.__constructor__(
+            query_compiler=self._query_compiler.series_view(dtype=dtype)
+        )
+
+    def where(
+        self,
+        cond: DataFrame | Series | Callable | AnyArrayLike,
+        other: DataFrame | Series | Callable | Scalar | None = np.nan,
+        inplace: bool = False,
+        axis: Axis | None = None,
+        level: Level | None = None,
+    ):
+        """
+        Replace values where the condition is False.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().where(
+            cond,
+            other=other,
+            inplace=inplace,
+            axis=axis,
+            level=level,
+        )
+
+    def xs(
+        self, key, axis=0, level=None, drop_level=True
+    ):  # pragma: no cover # noqa: PR01, D200
+        """
+        Return cross-section from the Series/DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented("")
+
+    @property
+    def attrs(self):  # noqa: RT01, D200
+        """
+        Return dictionary of global attributes of this dataset.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+
+        def attrs(df):
+            return df.attrs
+
+        return self._default_to_pandas(attrs)
+
+    @property
+    def array(self):  # noqa: RT01, D200
+        """
+        Return the ExtensionArray of the data backing this Series or Index.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+
+        def array(df):
+            return df.array
+
+        return self._default_to_pandas(array)
+
+    @property
+    def axes(self):  # noqa: RT01, D200
+        """
+        Return a list of the row axis labels.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return [self.index]
+
+    @property
+    def cat(self):  # noqa: RT01, D200
+        """
+        Accessor object for categorical properties of the Series values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        from .series_utils import CategoryMethods
+
+        return CategoryMethods(self)
+
+    @property
+    def dt(self):  # noqa: RT01, D200
+        """
+        Accessor object for datetimelike properties of the Series values.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        current_dtype = self.dtype
+        if not is_datetime64_any_dtype(current_dtype):
+            raise AttributeError("Can only use .dt accessor with datetimelike values")
+
+        from .series_utils import DatetimeProperties
+
+        return DatetimeProperties(self)
+
+    @property
+    def dtype(self):  # noqa: RT01, D200
+        """
+        Return the dtype object of the underlying data.
+        See :func:`DataFrame.dtypes` for exact behavior.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.dtype
+        dtype('int64')
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._query_compiler.dtypes.squeeze()
+
+    dtypes = dtype
+
+    @property
+    def empty(self) -> bool:
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return len(self) == 0
+
+    @property
+    def hasnans(self):  # noqa: RT01, D200
+        """
+        Return True if Series has any nans.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.isna().sum() > 0
+
+    def isna(self):
+        """
+        Detect missing values.
+
+        Returns
+        -------
+        The result of detecting missing values.
+        """
+        return super().isna()
+
+    def isnull(self):
+        """
+        Detect missing values.
+
+        Returns
+        -------
+        The result of detecting missing values.
+        """
+        return super().isnull()
+
+    @property
+    def is_monotonic_increasing(self):  # noqa: RT01, D200
+        """
+        Return True if values in the Series are monotonic_increasing.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._reduce_dimension(self._query_compiler.is_monotonic_increasing())
+
+    @property
+    def is_monotonic_decreasing(self):  # noqa: RT01, D200
+        """
+        Return True if values in the Series are monotonic_decreasing.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self._reduce_dimension(self._query_compiler.is_monotonic_decreasing())
+
+    @property
+    def is_unique(self):  # noqa: RT01, D200
+        """
+        Return True if values in the Series are unique.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.nunique(dropna=False) == len(self)
+
+    @property
+    def nbytes(self):  # noqa: RT01, D200
+        """
+        Return the number of bytes in the underlying data.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()
+        return self.memory_usage(index=False)
+
+    @property
+    def ndim(self) -> int:
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return 1
+
+    def nunique(self, dropna=True):  # noqa: PR01, RT01, D200
+        """
+        Return number of unique elements in the object.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super().nunique(dropna=dropna)
+
+    @property
+    def shape(
+        self,
+    ) -> tuple(int,):
+        return (len(self),)
+
+    def shift(
+        self,
+        periods: int = 1,
+        freq=None,
+        axis: Axis = 0,
+        fill_value: Hashable = no_default,
+    ):
+        """
+        Shift index by desired number of periods with an optional time `freq`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        if axis == 1:
+            # pandas compatible error.
+            raise ValueError("No axis named 1 for object type Series")
+
+        return super().shift(periods, freq, axis, fill_value)
+
+    @property
+    def str(self):  # noqa: RT01, D200
+        """
+        Vectorized string functions for Series and Index.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        current_dtype = self.dtype
+        if not is_string_dtype(current_dtype):
+            raise AttributeError("Can only use .str accessor with string values!")
+
+        from .series_utils import StringMethods
+
+        return StringMethods(self)
+
+    def _to_pandas(
+        self,
+        *,
+        statement_params: dict[str, str] | None = None,
+        **kwargs: Any,
+    ):
+        """
+        Convert Snowpark pandas Series to pandas Series
+
+        Args:
+            statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+        Returns:
+            pandas series
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        df = self._query_compiler.to_pandas(statement_params=statement_params, **kwargs)
+        if len(df.columns) == 0:
+            return pandas.Series([])
+        series = df[df.columns[0]]
+        # special case when series is wrapped as dataframe, but has not label.
+        # This is indicated with MODIN_UNNAMED_SERIES_LABEL
+        if self._query_compiler.columns[0] == MODIN_UNNAMED_SERIES_LABEL:
+            series.name = None
+
+        return series
+
+    def _to_datetime(self, **kwargs):
+        """
+        Convert `self` to datetime.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Optional arguments to use during query compiler's
+            `to_datetime` invocation.
+
+        Returns
+        -------
+        datetime
+            Series of datetime64 dtype.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.__constructor__(
+            query_compiler=self._query_compiler.series_to_datetime(**kwargs)
+        )
+
+    def _to_numeric(self, **kwargs: Any) -> Series:
+        """
+        Convert `self` to numeric.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Optional arguments to use during query compiler's
+            `to_numeric` invocation.
+
+        Returns
+        -------
+        numeric
+            Series of numeric dtype.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self.__constructor__(
+            query_compiler=self._query_compiler.to_numeric(**kwargs)
+        )
+
+    def _qcut(
+        self,
+        q: int | ListLike,
+        retbins: bool = False,
+        duplicates: Literal["raise", "drop"] = "raise",
+    ) -> Series:
+        """
+        Quantile-based discretization function.
+
+        See SnowflakeQueryCompiler.qcut for details.
+
+        """
+
+        return self.__constructor__(
+            query_compiler=self._query_compiler.qcut(q, retbins, duplicates)
+        )
+
+    def _reduce_dimension(self, query_compiler):
+        """
+        Try to reduce the dimension of data from the `query_compiler`.
+
+        Parameters
+        ----------
+        query_compiler : BaseQueryCompiler
+            Query compiler to retrieve the data.
+
+        Returns
+        -------
+        pandas.Series or pandas.DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return query_compiler.to_pandas().squeeze()
+
+    def _validate_dtypes_sum_prod_mean(self, axis, numeric_only, ignore_axis=False):
+        """
+        Validate data dtype for `sum`, `prod` and `mean` methods.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to validate over.
+        numeric_only : bool
+            Whether or not to allow only numeric data.
+            If True and non-numeric data is found, exception
+            will be raised.
+        ignore_axis : bool, default: False
+            Whether or not to ignore `axis` parameter.
+
+        Returns
+        -------
+        Series
+
+        Notes
+        -----
+        Actually returns unmodified `self` object,
+        added for compatibility with Modin DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self
+
+    def _validate_dtypes(self, numeric_only=False):
+        """
+        Check that all the dtypes are the same.
+
+        Parameters
+        ----------
+        numeric_only : bool, default: False
+            Whether or not to allow only numeric data.
+            If True and non-numeric data is found, exception
+            will be raised.
+
+        Notes
+        -----
+        Actually does nothing, added for compatibility with Modin DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        pass
+
+    def _get_numeric_data(self, axis: int):
+        """
+        Grab only numeric data from Series.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to inspect on having numeric types only.
+
+        Returns
+        -------
+        Series
+
+        Notes
+        -----
+        `numeric_only` parameter is not supported by Series, so this method
+        does not do anything. The method is added for compatibility with Modin DataFrame.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return self
+
+    def _update_inplace(self, new_query_compiler):
+        """
+        Update the current Series in-place using `new_query_compiler`.
+
+        Parameters
+        ----------
+        new_query_compiler : BaseQueryCompiler
+            QueryCompiler to use to manage the data.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        super()._update_inplace(new_query_compiler=new_query_compiler)
+        # Propagate changes back to parent so that column in dataframe had the same contents
+        if self._parent is not None:
+            if self._parent_axis == 0:
+                self._parent.loc[self.name] = self
+            else:
+                self._parent[self.name] = self
+
+    def _create_or_update_from_compiler(self, new_query_compiler, inplace=False):
+        """
+        Return or update a Series with given `new_query_compiler`.
+
+        Parameters
+        ----------
+        new_query_compiler : PandasQueryCompiler
+            QueryCompiler to use to manage the data.
+        inplace : bool, default: False
+            Whether or not to perform update or creation inplace.
+
+        Returns
+        -------
+        Series, DataFrame or None
+            None if update was done, Series or DataFrame otherwise.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        assert (
+            isinstance(new_query_compiler, type(self._query_compiler))
+            or type(new_query_compiler) in self._query_compiler.__class__.__bases__
+        ), f"Invalid Query Compiler object: {type(new_query_compiler)}"
+        if not inplace and new_query_compiler.is_series_like():
+            return self.__constructor__(query_compiler=new_query_compiler)
+        elif not inplace:
+            # This can happen with things like `reset_index` where we can add columns.
+            from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+
+            return DataFrame(query_compiler=new_query_compiler)
+        else:
+            self._update_inplace(new_query_compiler=new_query_compiler)
+
+    def _repartition(self):
+        """
+        Repartitioning Series to get ideal partitions inside.
+
+        Allows to improve performance where the query compiler can't improve
+        yet by doing implicit repartitioning.
+
+        Returns
+        -------
+        Series
+            The repartitioned Series.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return super()._repartition(axis=0)
+
+    # Persistance support methods - BEGIN
+    @classmethod
+    def _inflate_light(cls, query_compiler, name):
+        """
+        Re-creates the object from previously-serialized lightweight representation.
+
+        The method is used for faster but not disk-storable persistence.
+
+        Parameters
+        ----------
+        query_compiler : BaseQueryCompiler
+            Query compiler to use for object re-creation.
+        name : str
+            The name to give to the new object.
+
+        Returns
+        -------
+        Series
+            New Series based on the `query_compiler`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return cls(query_compiler=query_compiler, name=name)
+
+    @classmethod
+    def _inflate_full(cls, pandas_series):
+        """
+        Re-creates the object from previously-serialized disk-storable representation.
+
+        Parameters
+        ----------
+        pandas_series : pandas.Series
+            Data to use for object re-creation.
+
+        Returns
+        -------
+        Series
+            New Series based on the `pandas_series`.
+        """
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        return cls(data=pandas_series)
+
+    def __reduce__(self):
+        # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
+        ErrorMessage.not_implemented()  # pragma: no cover
+
+        self._query_compiler.finalize()
+        # if PersistentPickle.get():
+        #    return self._inflate_full, (self._to_pandas(),)
+        return self._inflate_light, (self._query_compiler, self.name)
+
+    # Persistance support methods - END
diff --git a/src/snowflake/snowpark/modin/pandas/series_utils.py b/src/snowflake/snowpark/modin/pandas/series_utils.py
new file mode 100644
index 00000000000..0a513955ed0
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/series_utils.py
@@ -0,0 +1,1517 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Implement Series's accessors public API as pandas does.
+
+Accessors: `Series.cat`, `Series.str`, `Series.dt`
+"""
+import re
+import sys
+from typing import TYPE_CHECKING, Callable, Optional, Union
+
+import numpy as np
+import pandas
+
+from snowflake.snowpark.modin.pandas import DataFrame, Series
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
+    # Python >= 3.7
+    from re import Pattern as _pattern_type
+else:
+    # Python <= 3.6
+    from re import _pattern_type
+
+if TYPE_CHECKING:
+    from datetime import tzinfo
+
+    from pandas._typing import npt
+
+# add this line to enable doc tests to run
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+
+
+@_inherit_docstrings(pandas.core.arrays.categorical.CategoricalAccessor)
+class CategoryMethods:
+    # CategoricalDType is not supported with Snowpark pandas API. Mark all methods
+    # to be unsupported.
+    category_not_supported_message = "CategoricalDType and corresponding methods is not available in Snowpark pandas API yet!"
+
+    def __init__(self, series) -> None:
+        self._series = series
+        self._query_compiler = series._query_compiler
+
+    @property
+    def categories(self):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    @categories.setter
+    def categories(self, categories):
+        ErrorMessage.not_implemented(
+            self.category_not_supported_message
+        )  # pragma: no cover
+
+    @property
+    def ordered(self):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    @property
+    def codes(self):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def rename_categories(self, new_categories, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def reorder_categories(self, new_categories, ordered=None, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def add_categories(self, new_categories, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def remove_categories(self, removals, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def remove_unused_categories(self, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def set_categories(self, new_categories, ordered=None, rename=False, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def as_ordered(self, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+    def as_unordered(self, inplace=False):
+        ErrorMessage.not_implemented(self.category_not_supported_message)
+
+
+@_inherit_docstrings(pandas.core.strings.accessor.StringMethods)
+class StringMethods:
+    def __init__(self, series) -> None:
+        # Check if dtypes is objects
+
+        self._series = series
+        self._query_compiler = series._query_compiler
+
+    def casefold(self):
+        return Series(query_compiler=self._query_compiler.str_casefold())
+
+    def cat(self, others=None, sep=None, na_rep=None, join=None):
+        compiler_result = self._query_compiler.str_cat(
+            others=others, sep=sep, na_rep=na_rep, join=join
+        )
+        # if others is None, result is a string. otherwise, it's a series.
+        return (
+            compiler_result.to_pandas().squeeze()
+            if others is None
+            else Series(query_compiler=compiler_result)
+        )
+
+    def decode(self, encoding, errors="strict"):
+        return Series(
+            query_compiler=self._query_compiler.str_decode(encoding, errors=errors)
+        )
+
+    def split(
+        self,
+        pat: Optional[str] = None,
+        n: int = -1,
+        expand: bool = False,
+        regex: Optional[bool] = None,
+    ) -> Series:
+        """
+        Split strings around given separator/delimiter.
+
+        Splits the string in the Series/Index from the beginning, at the specified delimiter string.
+
+        Parameters
+        ----------
+        pat : str, optional
+            String to split on. If not specified, split on whitespace.
+        n : int, default -1 (all)
+            Limit number of splits in output. None, 0 and -1 will be interpreted as return all splits.
+        expand : bool, default False (Not implemented yet, should be set to False)
+            Expand the split strings into separate columns.
+            - If True, return DataFrame/MultiIndex expanding dimensionality.
+            - If False, return Series/Index, containing lists of strings.
+        regex : bool, default None (Not implemented yet, should be set to False or None)
+            Determines if the passed-in pattern is a regular expression:
+            - If True, assumes the passed-in pattern is a regular expression
+            - If False or None, treats the pattern as a literal string.
+
+        Returns
+        -------
+        Series, Index, DataFrame or MultiIndex
+            Type matches caller unless expand=True (see Notes).
+
+        See also
+        --------
+        Series.str.split
+            Split strings around given separator/delimiter.
+        Series.str.rsplit
+            Splits string around given separator/delimiter, starting from the right.
+        Series.str.join
+            Join lists contained as elements in the Series/Index with passed delimiter.
+        str.split
+            Standard library version for split.
+        str.rsplit
+            Standard library version for rsplit.
+
+        Notes
+        -----
+        The handling of the n keyword depends on the number of found splits:
+
+            - If found splits > n, make first n splits only
+            - If found splits <= n, make all splits
+            - If for a certain row the number of found splits < n, append None for padding up to n if expand=True
+            - If using expand=True, Series and Index callers return DataFrame and MultiIndex objects, respectively.
+
+        Examples
+        --------
+        >>> s = pd.Series(
+            [
+                "this is a regular sentence",
+                "https://docs.python.org/3/tutorial/index.html",
+                np.nan
+            ]
+        )
+        s
+        0                       this is a regular sentence
+        1    https://docs.python.org/3/tutorial/index.html
+        2                                              NaN
+        dtype: object
+
+        In the default setting, the string is split by whitespace.
+
+        >>> s.str.split()
+        0                   [this, is, a, regular, sentence]
+        1    [https://docs.python.org/3/tutorial/index.html]
+        2                                                NaN
+        dtype: object
+
+        The n parameter can be used to limit the number of splits on the delimiter.
+
+        >>> s.str.split(n=2)
+        0                     [this, is, a regular sentence]
+        1    [https://docs.python.org/3/tutorial/index.html]
+        2                                                NaN
+        dtype: object
+
+        The pat parameter can be used to split by other characters.
+
+        >>> s.str.split(pat="/")
+        0                         [this is a regular sentence]
+        1    [https:, , docs.python.org, 3, tutorial, index...
+        2                                                  NaN
+        dtype: object
+        """
+        if not pat and pat is not None:
+            raise ValueError("split() requires a non-empty pattern match.")
+
+        else:
+            return Series(
+                query_compiler=self._query_compiler.str_split(
+                    pat=pat, n=n, expand=expand, regex=regex
+                )
+            )
+
+    def rsplit(self, pat=None, n=-1, expand=False):
+        if not pat and pat is not None:
+            raise ValueError("rsplit() requires a non-empty pattern match.")
+
+        else:
+            return Series(
+                query_compiler=self._query_compiler.str_rsplit(
+                    pat=pat, n=n, expand=expand
+                )
+            )
+
+    def get(self, i):
+        return Series(query_compiler=self._query_compiler.str_get(i))
+
+    def join(self, sep):
+        if sep is None:
+            raise AttributeError("'NoneType' object has no attribute 'join'")
+        return Series(query_compiler=self._query_compiler.str_join(sep))
+
+    def get_dummies(self, sep="|"):
+        return DataFrame(query_compiler=self._query_compiler.str_get_dummies(sep))
+
+    def contains(
+        self,
+        pat: str,
+        case: bool = True,
+        flags: int = 0,
+        na: object = None,
+        regex: bool = True,
+    ):
+        """
+        Test if pattern or regex is contained within a string of a Series or Index.
+
+        Return boolean Series or Index based on whether a given pattern or regex is contained within a string of a Series or Index.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence or regular expression.
+        case : bool, default True
+            If True, case sensitive.
+        flags : int, default 0 (no flags)
+            Flags to pass through to the re module, e.g. re.IGNORECASE.
+        na : scalar, optional
+            Fill value for missing values. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+        regex : bool, default True
+            If True, assumes the pat is a regular expression.
+            If False, treats the pat as a literal string.
+
+        Returns
+        -------
+        Series or Index of boolean values
+            A Series or Index of boolean values indicating whether the given pattern is contained within the string of each element of the Series or Index.
+
+        See also
+        --------
+        match
+            Analogous, but stricter, relying on re.match instead of re.search.
+        Series.str.startswith
+            Test if the start of each string element matches a pattern.
+        Series.str.endswith
+            Same as startswith, but tests the end of string.
+
+        Examples
+        --------
+        Returning a Series of booleans using only a literal pattern.
+
+        >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
+        >>> s1.str.contains('og', regex=False)
+        0    False
+        1     True
+        2    False
+        3    False
+        4      NaN
+        dtype: object
+
+        Returning an Index of booleans using only a literal pattern.
+
+        >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
+        >>> ind.str.contains('23', regex=False)
+        Index([False, False, False, True, nan], dtype='object')
+
+        Specifying case sensitivity using case.
+
+        >>> s1.str.contains('oG', case=True, regex=True)
+        0    False
+        1    False
+        2    False
+        3    False
+        4      NaN
+        dtype: object
+
+        Specifying na to be False instead of NaN replaces NaN values with False. If Series or Index does not contain NaN values the resultant dtype will be bool, otherwise, an object dtype.
+
+        >>> s1.str.contains('og', na=False, regex=True)
+        0    False
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        Returning ‘house’ or ‘dog’ when either expression occurs in a string.
+
+        >>> s1.str.contains('house|dog', regex=True)
+        0    False
+        1     True
+        2     True
+        3    False
+        4      NaN
+        dtype: object
+
+        Ignoring case sensitivity using flags with regex.
+
+        >>> import re
+        >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
+        0    False
+        1    False
+        2     True
+        3    False
+        4      NaN
+        dtype: object
+
+        Returning any digit using regular expression.
+
+        >>> s1.str.contains('\\d', regex=True)
+        0    False
+        1    False
+        2    False
+        3     True
+        4      NaN
+        dtype: object
+
+        Ensure pat is a not a literal pattern when regex is set to True. Note in the following example one might expect only s2[1] and s2[3] to return True. However, ‘.0’ as a regex matches any character followed by a 0.
+
+        >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
+        >>> s2.str.contains('.0', regex=True)
+        0     True
+        1     True
+        2    False
+        3     True
+        4    False
+        dtype: bool
+        """
+        return Series(
+            query_compiler=self._query_compiler.str_contains(
+                pat, case=case, flags=flags, na=na, regex=regex
+            )
+        )
+
+    def replace(
+        self,
+        pat: str,
+        repl: Union[str, Callable],
+        n: int = -1,
+        case: Optional[bool] = None,
+        flags: int = 0,
+        regex: bool = True,
+    ) -> Series:
+        r"""
+        Replace each occurrence of pattern/regex in the Series/Index.
+
+        Equivalent to str.replace() or re.sub(), depending on the regex value.
+
+        Parameters
+        ----------
+        pat : str
+            String can be a character sequence or regular expression.
+        repl : str or callable
+            Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See re.sub().
+        n : int, default -1 (all)
+            Number of replacements to make from start.
+        case : bool, default None
+            Determines if replace is case sensitive:
+            - If True, case sensitive (the default if pat is a string)
+            - Set to False for case insensitive
+            - Cannot be set if pat is a compiled regex.
+        flags : int, default 0 (no flags)
+            Regex module flags, e.g. re.IGNORECASE. Cannot be set if pat is a compiled regex.
+        regex : bool, default False
+            Determines if the passed-in pattern is a regular expression:
+            - If True, assumes the passed-in pattern is a regular expression.
+            - If False, treats the pattern as a literal string
+            - Cannot be set to False if pat is a compiled regex or repl is a callable.
+
+        Returns
+        -------
+        Series or Index of object
+            A copy of the object with all matching occurrences of pat replaced by repl.
+
+        Raises
+        ------
+        ValueError
+            - if regex is False and repl is a callable or pat is a compiled regex
+            - if pat is a compiled regex and case or flags is set
+
+        Notes
+        -----
+        When pat is a compiled regex, all flags should be included in the compiled regex. Use of case, flags, or regex=False with a compiled regex will raise an error.
+
+        Examples
+        --------
+        When pat is a string and regex is True, the given pat is compiled as a regex. When repl is a string, it replaces matching regex patterns as with re.sub(). NaN value(s) in the Series are left as is:
+
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
+        0    bao
+        1    baz
+        2    NaN
+        dtype: object
+
+        When pat is a string and regex is False, every pat is replaced with repl as with str.replace():
+
+        >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
+        0    bao
+        1    fuz
+        2    NaN
+        dtype: object
+
+        When repl is a callable, it is called on every pat using re.sub(). The callable should expect one positional argument (a regex object) and return a string.
+
+        To get the idea:
+
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr, regex=True)
+        0    <re.Match object; span=(0, 1), match='f'>oo
+        1    <re.Match object; span=(0, 1), match='f'>uz
+        2                                            NaN
+        dtype: object
+
+        Reverse every lowercase alphabetic word:
+
+        >>> repl = lambda m: m.group(0)[::-1]
+        >>> ser = pd.Series(['foo 123', 'bar baz', np.nan])
+        >>> ser.str.replace(r'[a-z]+', repl, regex=True)
+        0    oof 123
+        1    rab zab
+        2        NaN
+        dtype: object
+
+        Using regex groups (extract second group and swap case):
+
+        >>> pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
+        >>> repl = lambda m: m.group('two').swapcase()
+        >>> ser = pd.Series(['One Two Three', 'Foo Bar Baz'])
+        >>> ser.str.replace(pat, repl, regex=True)
+        0    tWO
+        1    bAR
+        dtype: object
+
+        Using a compiled regex with flags
+
+        >>> import re
+        >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar', regex=True)
+        0    foo
+        1    bar
+        2    NaN
+        dtype: object
+        """
+        if not (isinstance(repl, str) or callable(repl)):
+            raise TypeError("repl must be a string or callable")
+        return Series(
+            query_compiler=self._query_compiler.str_replace(
+                pat, repl, n=n, case=case, flags=flags, regex=regex
+            )
+        )
+
+    def pad(self, width, side="left", fillchar=" "):
+        if len(fillchar) != 1:
+            raise TypeError("fillchar must be a character, not str")
+        return Series(
+            query_compiler=self._query_compiler.str_pad(
+                width, side=side, fillchar=fillchar
+            )
+        )
+
+    def center(self, width, fillchar=" "):
+        if len(fillchar) != 1:
+            raise TypeError("fillchar must be a character, not str")
+        return Series(
+            query_compiler=self._query_compiler.str_center(width, fillchar=fillchar)
+        )
+
+    def ljust(self, width, fillchar=" "):
+        if len(fillchar) != 1:
+            raise TypeError("fillchar must be a character, not str")
+        return Series(
+            query_compiler=self._query_compiler.str_ljust(width, fillchar=fillchar)
+        )
+
+    def rjust(self, width, fillchar=" "):
+        if len(fillchar) != 1:
+            raise TypeError("fillchar must be a character, not str")
+        return Series(
+            query_compiler=self._query_compiler.str_rjust(width, fillchar=fillchar)
+        )
+
+    def zfill(self, width):
+        return Series(query_compiler=self._query_compiler.str_zfill(width))
+
+    def wrap(self, width, **kwargs):
+        if width <= 0:
+            raise ValueError(f"invalid width {width} (must be > 0)")
+        return Series(query_compiler=self._query_compiler.str_wrap(width, **kwargs))
+
+    def slice(self, start=None, stop=None, step=None):
+        if step == 0:
+            raise ValueError("slice step cannot be zero")
+        return Series(
+            query_compiler=self._query_compiler.str_slice(
+                start=start, stop=stop, step=step
+            )
+        )
+
+    def slice_replace(self, start=None, stop=None, repl=None):
+        return Series(
+            query_compiler=self._query_compiler.str_slice_replace(
+                start=start, stop=stop, repl=repl
+            )
+        )
+
+    def count(self, pat: str, flags: int = 0, **kwargs):
+        """
+        Count occurrences of pattern in each string of the Series/Index.
+
+        This function is used to count the number of times a particular regex pattern is repeated in each of the string elements of the Series.
+
+        Parameters
+        ----------
+        pat : str
+            Valid regular expression.
+        flags : int, default 0, meaning no flags
+            Flags for the re module.
+        **kwargs
+            For compatibility with other string methods. Not used.
+
+        Returns
+        -------
+        Series or Index
+            Same type as the calling object containing the integer counts.
+
+        See also
+        --------
+        re
+            Standard library module for regular expressions.
+        str.count
+            Standard library version, without regular expression support.
+
+        Notes
+        -----
+        Some characters need to be escaped when passing in pat. eg. '$' has a special meaning in regex and must be escaped when finding this literal character.
+
+        Examples
+        --------
+        >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
+        >>> s.str.count('a')
+        0    0.0
+        1    0.0
+        2    2.0
+        3    2.0
+        4    NaN
+        5    0.0
+        6    1.0
+        dtype: float64
+
+        Escape '$' to find the literal dollar sign.
+
+        >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
+        >>> s.str.count('\\$')
+        0    1
+        1    0
+        2    1
+        3    2
+        4    2
+        5    0
+        dtype: int64
+
+        This is also available on Index
+
+        >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
+        Int64Index([0, 0, 2, 1], dtype='int64')
+        """
+        if not isinstance(pat, (str, _pattern_type)):
+            raise TypeError("first argument must be string or compiled pattern")
+        return Series(
+            query_compiler=self._query_compiler.str_count(pat, flags=flags, **kwargs)
+        )
+
+    def startswith(self, pat, na=np.NaN):
+        """
+        Test if the start of each string element matches a pattern.
+
+        Parameters
+        ----------
+        pat : str or tuple[str, ...]
+            Character sequence or tuple of strings. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+
+        Returns
+        -------
+        Series or Index of bool
+            A Series of booleans indicating whether the given pattern matches the start of each string element.
+
+        See also
+        --------
+        str.startswith
+            Python standard library string method.
+        Series.str.endswith
+            Same as startswith, but tests the end of string.
+        Series.str.contains
+            Tests if string element contains a pattern.
+
+        Examples
+        --------
+        >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan])
+        >>> s
+        0     bat
+        1    Bear
+        2     cat
+        3     NaN
+        dtype: object
+
+        >>> s.str.startswith('b')
+        0     True
+        1    False
+        2    False
+        3      NaN
+        dtype: object
+
+        >>> s.str.startswith(('b', 'B'))
+        0     True
+        1     True
+        2    False
+        3      NaN
+        dtype: object
+
+        Specifying na to be False instead of NaN.
+
+        >>> s.str.startswith('b', na=False)
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_startswith(pat, na=na))
+
+    def encode(self, encoding, errors="strict"):
+        return Series(
+            query_compiler=self._query_compiler.str_encode(encoding, errors=errors)
+        )
+
+    def endswith(self, pat, na=np.NaN):
+        """
+        Test if the end of each string element matches a pattern.
+
+        Parameters
+        ----------
+        pat : str or tuple[str, …]
+            Character sequence or tuple of strings. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+
+        Returns
+        -------
+        Series or Index of bool
+            A Series of booleans indicating whether the given pattern matches the end of each string element.
+
+        See also
+        --------
+        str.endswith
+            Python standard library string method.
+        Series.str.startswith
+            Same as endswith, but tests the start of string.
+        Series.str.contains
+            Tests if string element contains a pattern.
+
+        Examples
+        --------
+        >>> s = pd.Series(['bat', 'bear', 'caT', np.nan])
+        >>> s
+        0     bat
+        1    bear
+        2     caT
+        3     NaN
+        dtype: object
+
+        >>> s.str.endswith('t')
+        0     True
+        1    False
+        2    False
+        3      NaN
+        dtype: object
+
+        >>> s.str.endswith(('t', 'T'))
+        0     True
+        1    False
+        2     True
+        3      NaN
+        dtype: object
+
+        Specifying na to be False instead of NaN.
+
+        >>> s.str.endswith('t', na=False)
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_endswith(pat, na=na))
+
+    def findall(self, pat, flags=0, **kwargs):
+        if not isinstance(pat, (str, _pattern_type)):
+            raise TypeError("first argument must be string or compiled pattern")
+        return Series(
+            query_compiler=self._query_compiler.str_findall(pat, flags=flags, **kwargs)
+        )
+
+    def match(self, pat, case=True, flags=0, na=np.NaN):
+        if not isinstance(pat, (str, _pattern_type)):
+            raise TypeError("first argument must be string or compiled pattern")
+        return Series(
+            query_compiler=self._query_compiler.str_match(pat, flags=flags, na=na)
+        )
+
+    def extract(self, pat, flags=0, expand=True):
+        query_compiler = self._query_compiler.str_extract(
+            pat, flags=flags, expand=expand
+        )
+        return (
+            DataFrame(query_compiler=query_compiler)
+            if expand or re.compile(pat).groups > 1
+            else Series(query_compiler=query_compiler)
+        )
+
+    def extractall(self, pat, flags=0):
+        return Series(query_compiler=self._query_compiler.str_extractall(pat, flags))
+
+    def len(self):
+        """
+        Get the length of a string. For non-string values this
+        returns the length of the string representation.
+
+        Returns
+        -------
+        Series
+            A Series with the length of each value
+
+        Examples
+        --------
+        >>> s = pd.Series(['dog',
+        ...                 '',
+        ...                 5,
+        ...                 {'foo' : 'bar'},
+        ...                 [2, 3, 5, 7],
+        ...                 ('one', 'two', 'three')])
+        >>> s.str.len()
+        0     3
+        1     0
+        2     1
+        3    13
+        4     9
+        5    21
+        dtype: int64
+        """
+        return Series(query_compiler=self._query_compiler.str_len())
+
+    def strip(self, to_strip: str = None) -> Series:
+        """
+        Remove leading and trailing characters.
+
+        Strip whitespaces (including newlines) or a set of specified characters from each string in the Series/Index from left and right sides. Replaces any non-strings in Series with NaNs. Equivalent to str.strip().
+
+        Parameters
+        ----------
+        to_strip : str or None, default None
+            Specifying the set of characters to be removed. All combinations of this set of characters will be stripped. If None then whitespaces are removed.
+
+        Returns
+        -------
+        Series or Index of object
+
+        See also
+        --------
+        Series.str.strip
+            Remove leading and trailing characters in Series/Index.
+        Series.str.lstrip
+            Remove leading characters in Series/Index.
+        Series.str.rstrip
+            Remove trailing characters in Series/Index.
+
+        Examples
+        --------
+        >>> s = pd.Series(['1. Ant.  ', '2. Bee!\\n', '3. Cat?\\t', np.nan, 10, True])
+        >>> s
+        0    1. Ant.
+        1    2. Bee!\\n
+        2    3. Cat?\\t
+        3          NaN
+        4           10
+        5         True
+        dtype: object
+
+        >>> s.str.strip()
+        0    1. Ant.
+        1    2. Bee!
+        2    3. Cat?
+        3        NaN
+        4        NaN
+        5        NaN
+        dtype: object
+
+        >>> s.str.lstrip('123.')
+        0    Ant.
+        1    Bee!\\n
+        2    Cat?\\t
+        3       NaN
+        4       NaN
+        5       NaN
+        dtype: object
+
+        >>> s.str.rstrip('.!? \\n\\t')
+        0    1. Ant
+        1    2. Bee
+        2    3. Cat
+        3       NaN
+        4       NaN
+        5       NaN
+        dtype: object
+
+        >>> s.str.strip('123.!? \\n\\t')
+        0    Ant
+        1    Bee
+        2    Cat
+        3    NaN
+        4    NaN
+        5    NaN
+        dtype: object
+        """
+        return Series(query_compiler=self._query_compiler.str_strip(to_strip=to_strip))
+
+    def rstrip(self, to_strip=None):
+        return Series(query_compiler=self._query_compiler.str_rstrip(to_strip=to_strip))
+
+    def lstrip(self, to_strip=None):
+        return Series(query_compiler=self._query_compiler.str_lstrip(to_strip=to_strip))
+
+    def partition(self, sep=" ", expand=True):
+        if sep is not None and len(sep) == 0:
+            raise ValueError("empty separator")
+
+        return (DataFrame if expand else Series)(
+            query_compiler=self._query_compiler.str_partition(sep=sep, expand=expand)
+        )
+
+    def removeprefix(self, prefix):
+        return Series(query_compiler=self._query_compiler.str_removeprefix(prefix))
+
+    def removesuffix(self, suffix):
+        return Series(query_compiler=self._query_compiler.str_removesuffix(suffix))
+
+    def repeat(self, repeats):
+        return Series(query_compiler=self._query_compiler.str_repeat(repeats))
+
+    def rpartition(self, sep=" ", expand=True):
+        if sep is not None and len(sep) == 0:
+            raise ValueError("empty separator")
+
+        else:
+            return Series(
+                query_compiler=self._query_compiler.str_rpartition(
+                    sep=sep, expand=expand
+                )
+            )
+
+    def lower(self):
+        return Series(query_compiler=self._query_compiler.str_lower())
+
+    def upper(self):
+        return Series(query_compiler=self._query_compiler.str_upper())
+
+    def title(self):
+        """
+        Convert strings in the Series/Index to be titlecased .
+
+        Returns
+        -------
+        Series or Index of object
+
+        See also
+        --------
+        Series.str.lower
+            Converts all characters to lowercase.
+
+        Series.str.upper
+            Converts all characters to uppercase.
+
+        Series.str.title
+            Converts first character of each word to uppercase and remaining to lowercase.
+
+        Series.str.capitalize
+            Converts first character to uppercase and remaining to lowercase.
+
+        Series.str.swapcase
+            Converts uppercase to lowercase and lowercase to uppercase.
+
+        Series.str.casefold
+            Removes all case distinctions in the string.
+
+        Examples
+        --------
+        >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
+        >>> s
+        0                 lower
+        1              CAPITALS
+        2    this is a sentence
+        3              SwApCaSe
+        dtype: object
+
+        >>> s.str.title()
+        0                 Lower
+        1              Capitals
+        2    This Is A Sentence
+        3              Swapcase
+        dtype: object
+        """
+        return Series(query_compiler=self._query_compiler.str_title())
+
+    def find(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            raise TypeError(f"expected a string object, not {type(sub).__name__}")
+        return Series(
+            query_compiler=self._query_compiler.str_find(sub, start=start, end=end)
+        )
+
+    def rfind(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            raise TypeError(f"expected a string object, not {type(sub).__name__}")
+        return Series(
+            query_compiler=self._query_compiler.str_rfind(sub, start=start, end=end)
+        )
+
+    def index(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            raise TypeError(f"expected a string object, not {type(sub).__name__}")
+        return Series(
+            query_compiler=self._query_compiler.str_index(sub, start=start, end=end)
+        )
+
+    def rindex(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            raise TypeError(f"expected a string object, not {type(sub).__name__}")
+        return Series(
+            query_compiler=self._query_compiler.str_rindex(sub, start=start, end=end)
+        )
+
+    def capitalize(self):
+        """
+        Convert strings in the Series/Index to be capitalized.
+
+        Returns
+        -------
+        Series or Index of object
+
+        See also
+        --------
+        Series.str.lower
+            Converts all characters to lowercase.
+
+        Series.str.upper
+            Converts all characters to uppercase.
+
+        Series.str.title
+            Converts first character of each word to uppercase and remaining to lowercase.
+
+        Series.str.capitalize
+            Converts first character to uppercase and remaining to lowercase.
+
+        Series.str.swapcase
+            Converts uppercase to lowercase and lowercase to uppercase.
+
+        Series.str.casefold
+            Removes all case distinctions in the string.
+
+        Examples
+        --------
+        >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
+        >>> s
+        0                 lower
+        1              CAPITALS
+        2    this is a sentence
+        3              SwApCaSe
+        dtype: object
+
+        >>> s.str.capitalize()
+        0                 Lower
+        1              Capitals
+        2    This is a sentence
+        3              Swapcase
+        dtype: object
+        """
+        return Series(query_compiler=self._query_compiler.str_capitalize())
+
+    def swapcase(self):
+        return Series(query_compiler=self._query_compiler.str_swapcase())
+
+    def normalize(self, form):
+        return Series(query_compiler=self._query_compiler.str_normalize(form))
+
+    def translate(self, table):
+        return Series(query_compiler=self._query_compiler.str_translate(table))
+
+    def isalnum(self):
+        return Series(query_compiler=self._query_compiler.str_isalnum())
+
+    def isalpha(self):
+        return Series(query_compiler=self._query_compiler.str_isalpha())
+
+    def isdigit(self):
+        """
+        Check whether all characters in each string are digits.
+
+        This is equivalent to running the Python string method str.isdigit() for each element of the Series. If a string has zero characters, False is returned for that check.
+
+        Returns
+        -------
+        Series of boolean values with the same length as the original Series.
+
+        Examples
+        --------
+        >>> s = pd.Series(['23', '³', '⅕', ''])
+
+        The `s.str.isdigit` method checks for characters used to form numbers in base 10.
+        Currently, special digits like superscripted and subscripted digits in unicode are
+        not checked for.
+        >>> s.str.isdigit()
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_isdigit())
+
+    def isspace(self):
+        return Series(query_compiler=self._query_compiler.str_isspace())
+
+    def islower(self):
+        """
+        Check whether all characters in each string are lowercase.
+
+        This is equivalent to running the Python string method str.islower() for each element of the Series. If a string has zero characters, False is returned for that check.
+
+        Returns
+        -------
+        Series of boolean values with the same length as the original Series.
+
+        Examples
+        --------
+        >>> s = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
+        >>> s.str.islower()
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_islower())
+
+    def isupper(self):
+        """
+        Check whether all characters in each string are uppercase.
+
+        This is equivalent to running the Python string method str.isupper() for each element of the Series. If a string has zero characters, False is returned for that check.
+
+        Returns
+        -------
+        Series of boolean values with the same length as the original Series.
+
+        Examples
+        --------
+        >>> s = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
+        >>> s.str.isupper()
+        0    False
+        1    False
+        2     True
+        3    False
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_isupper())
+
+    def istitle(self):
+        """
+        Check whether all characters in each string are uppercase.
+
+        This is equivalent to running the Python string method str.isupper() for each element of the Series. If a string has zero characters, False is returned for that check.
+
+        Returns
+        -------
+        Series of boolean values with the same length as the original Series.
+
+        Examples
+        --------
+        >>> s = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '', 'Snake'])
+        >>> s.str.istitle()
+        0    False
+        1     True
+        2    False
+        3    False
+        4     True
+        dtype: bool
+        """
+        return Series(query_compiler=self._query_compiler.str_istitle())
+
+    def isnumeric(self):
+        return Series(query_compiler=self._query_compiler.str_isnumeric())
+
+    def isdecimal(self):
+        return Series(query_compiler=self._query_compiler.str_isdecimal())
+
+
+@_inherit_docstrings(pandas.core.indexes.accessors.CombinedDatetimelikeProperties)
+class DatetimeProperties:
+    def __init__(self, series) -> None:
+        self._series = series
+        self._query_compiler = series._query_compiler
+
+    @property
+    def date(self):
+        """
+        Returns a series of python :class:`datetime.date` objects.
+
+        Namely, the date part of Timestamps without time and timezone information.
+
+        Examples
+        --------
+        For Series:
+
+        >>> s = pd.Series(["2020-01-01 01:23:00", "2020-02-01 12:11:05"])
+        >>> s = pd.to_datetime(s)
+        >>> s
+        0   2020-01-01 01:23:00
+        1   2020-02-01 12:11:05
+        dtype: datetime64[ns]
+        >>> s.dt.date
+        0    2020-01-01
+        1    2020-02-01
+        dtype: object
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("date"))
+
+    @property
+    def time(self):
+        return Series(query_compiler=self._query_compiler.dt_time())
+
+    @property
+    def timetz(self):
+        return Series(query_compiler=self._query_compiler.dt_timetz())
+
+    @property
+    def year(self):
+        """
+        Returns a series of the years of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="YE")
+        ... )
+        >>> datetime_series
+        0   2000-12-31
+        1   2001-12-31
+        2   2002-12-31
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.year
+        0    2000
+        1    2001
+        2    2002
+        dtype: int16
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("year"))
+
+    @property
+    def month(self):
+        """
+        Returns a series of the months of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="ME")
+        ... )
+        >>> datetime_series
+        0   2000-01-31
+        1   2000-02-29
+        2   2000-03-31
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.month
+        0    1
+        1    2
+        2    3
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("month"))
+
+    @property
+    def day(self):
+        """
+        Returns a series of the days of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="D")
+        ... )
+        >>> datetime_series
+        0   2000-01-01
+        1   2000-01-02
+        2   2000-01-03
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.day
+        0    1
+        1    2
+        2    3
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("day"))
+
+    @property
+    def hour(self):
+        """
+        Returns a series of the hours of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pandas.date_range("2000-01-01", periods=3, freq="h")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 01:00:00
+        2   2000-01-01 02:00:00
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.hour
+        0    0
+        1    1
+        2    2
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("hour"))
+
+    @property
+    def minute(self):
+        """
+        Returns a series of the minutes of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="min")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 00:01:00
+        2   2000-01-01 00:02:00
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.minute
+        0    0
+        1    1
+        2    2
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("minute"))
+
+    @property
+    def second(self):
+        """
+        Returns a series of the seconds of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="s")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 00:00:01
+        2   2000-01-01 00:00:02
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.second
+        0    0
+        1    1
+        2    2
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("second"))
+
+    @property
+    def microsecond(self):
+        return Series(query_compiler=self._query_compiler.dt_microsecond())
+
+    @property
+    def nanosecond(self):
+        return Series(query_compiler=self._query_compiler.dt_nanosecond())
+
+    @property
+    def week(self):
+        return Series(query_compiler=self._query_compiler.dt_week())
+
+    @property
+    def weekofyear(self):
+        return Series(query_compiler=self._query_compiler.dt_weekofyear())
+
+    @property
+    def dayofweek(self):
+        return Series(query_compiler=self._query_compiler.dt_dayofweek())
+
+    @property
+    def weekday(self):
+        return Series(query_compiler=self._query_compiler.dt_weekday())
+
+    @property
+    def dayofyear(self):
+        return Series(query_compiler=self._query_compiler.dt_dayofyear())
+
+    @property
+    def quarter(self):
+        """
+        Returns a series of the quarters of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="3ME")
+        ... )
+        >>> datetime_series
+        0   2000-01-31
+        1   2000-04-30
+        2   2000-07-31
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.quarter
+        0    1
+        1    2
+        2    3
+        dtype: int8
+        """
+        return Series(query_compiler=self._query_compiler.dt_property("quarter"))
+
+    @property
+    def is_month_start(self):
+        return Series(query_compiler=self._query_compiler.dt_is_month_start())
+
+    @property
+    def is_month_end(self):
+        return Series(query_compiler=self._query_compiler.dt_is_month_end())
+
+    @property
+    def is_quarter_start(self):
+        return Series(query_compiler=self._query_compiler.dt_is_quarter_start())
+
+    @property
+    def is_quarter_end(self):
+        return Series(query_compiler=self._query_compiler.dt_is_quarter_end())
+
+    @property
+    def is_year_start(self):
+        return Series(query_compiler=self._query_compiler.dt_is_year_start())
+
+    @property
+    def is_year_end(self):
+        return Series(query_compiler=self._query_compiler.dt_is_year_end())
+
+    @property
+    def is_leap_year(self):
+        return Series(query_compiler=self._query_compiler.dt_is_leap_year())
+
+    @property
+    def daysinmonth(self):
+        return Series(query_compiler=self._query_compiler.dt_daysinmonth())
+
+    @property
+    def days_in_month(self):
+        return Series(query_compiler=self._query_compiler.dt_days_in_month())
+
+    @property
+    def tz(self) -> "tzinfo | None":
+        dtype = self._series.dtype
+        if isinstance(dtype, np.dtype):
+            return None
+        return dtype.tz
+
+    @property
+    def freq(self):
+        return self._query_compiler.dt_freq().to_pandas().squeeze()
+
+    def to_period(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_to_period(*args, **kwargs))
+
+    def to_pydatetime(self):
+        return Series(query_compiler=self._query_compiler.dt_to_pydatetime()).to_numpy()
+
+    def tz_localize(self, *args, **kwargs):
+        return Series(
+            query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs)
+        )
+
+    def tz_convert(self, *args, **kwargs):
+        return Series(
+            query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs)
+        )
+
+    def normalize(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_normalize(*args, **kwargs))
+
+    def strftime(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_strftime(*args, **kwargs))
+
+    def round(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_round(*args, **kwargs))
+
+    def floor(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_floor(*args, **kwargs))
+
+    def ceil(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_ceil(*args, **kwargs))
+
+    def month_name(self, *args, **kwargs):
+        return Series(
+            query_compiler=self._query_compiler.dt_month_name(*args, **kwargs)
+        )
+
+    def day_name(self, *args, **kwargs):
+        return Series(query_compiler=self._query_compiler.dt_day_name(*args, **kwargs))
+
+    def total_seconds(self, *args, **kwargs):
+        return Series(
+            query_compiler=self._query_compiler.dt_total_seconds(*args, **kwargs)
+        )
+
+    def to_pytimedelta(self) -> "npt.NDArray[np.object_]":
+        res = self._query_compiler.dt_to_pytimedelta()
+        return res.to_numpy()[:, 0]
+
+    @property
+    def seconds(self):
+        return Series(query_compiler=self._query_compiler.dt_seconds())
+
+    @property
+    def days(self):
+        return Series(query_compiler=self._query_compiler.dt_days())
+
+    @property
+    def microseconds(self):
+        return Series(query_compiler=self._query_compiler.dt_microseconds())
+
+    @property
+    def nanoseconds(self):
+        return Series(query_compiler=self._query_compiler.dt_nanoseconds())
+
+    @property
+    def components(self):
+
+        return DataFrame(query_compiler=self._query_compiler.dt_components())
+
+    @property
+    def qyear(self):
+        return Series(query_compiler=self._query_compiler.dt_qyear())
+
+    @property
+    def start_time(self):
+        return Series(query_compiler=self._query_compiler.dt_start_time())
+
+    @property
+    def end_time(self):
+        return Series(query_compiler=self._query_compiler.dt_end_time())
+
+    def to_timestamp(self, *args, **kwargs):
+        return Series(
+            query_compiler=self._query_compiler.dt_to_timestamp(*args, **kwargs)
+        )
diff --git a/src/snowflake/snowpark/modin/pandas/shared_docs.py b/src/snowflake/snowpark/modin/pandas/shared_docs.py
new file mode 100644
index 00000000000..20bdde253a9
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/shared_docs.py
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+#
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the pandas project, under the BSD 3-Clause License
+
+from __future__ import annotations
+
+_shared_docs: dict[str, str] = {}
+
+_shared_docs[
+    "aggregate"
+] = """
+Aggregate using one or more operations over the specified axis.
+
+Parameters
+----------
+func : function, str, list or dict
+    Function to use for aggregating the data. If a function, must either
+    work when passed a {klass} or when passed to {klass}.apply.
+
+    Accepted combinations are:
+
+    - function
+    - string function name
+    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+    - dict of axis labels -> functions, function names or list of such.
+{axis}
+*args
+    Positional arguments to pass to `func`.
+**kwargs
+    Keyword arguments to pass to `func`.
+
+Returns
+-------
+scalar, Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+
+    The return can be:
+
+    * scalar : when Snowpark pandas Series.agg is called with single function
+    * Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` : when Snowpark pandas DataFrame.agg is called with a single function
+    * Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` : when Snowpark pandas DataFrame.agg is called with several functions
+
+    Return scalar, Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`.
+
+Notes
+-----
+The aggregation operations are always performed over an axis, either the
+index (default) or the column axis. This behavior is different from
+`numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`,
+`var`), where the default is to compute the aggregation of the flattened
+array, e.g., ``numpy.mean(arr_2d)`` as opposed to
+``numpy.mean(arr_2d, axis=0)``.
+
+`agg` is an alias for `aggregate`. Use the alias.
+
+Functions that mutate the passed object can produce unexpected
+behavior or errors and are not supported.
+
+A passed user-defined-function will be passed a Series for evaluation.
+{examples}"""
diff --git a/src/snowflake/snowpark/modin/pandas/snow_partition_iterator.py b/src/snowflake/snowpark/modin/pandas/snow_partition_iterator.py
new file mode 100644
index 00000000000..3529355b81b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/snow_partition_iterator.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from collections.abc import Iterator
+from typing import Any, Callable
+
+import pandas
+
+import snowflake.snowpark.modin.pandas.dataframe as DataFrame
+
+PARTITION_SIZE = 4096
+
+
+class SnowparkPandasRowPartitionIterator(Iterator):
+    """
+    Iterator on partitioned data used by DataFrame.iterrows and DataFrame.itertuples to iterate over axis=0 or rows.
+
+    SnowparkPandasRowPartitionIterator pulls table data in batches (where number of rows = PARTITION_SIZE) to iterate
+    over rows. This is to prevent the table from being queried for every single row - the batch of rows pulled in is
+    converted to a native pandas DataFrame and completely iterated over before pulling in the next batch. This results
+    in to_pandas() query being made per batch; no joins are ever performed in this implementation.
+
+    However, if enable_partition_with_native_pandas is set to False, it behaves just like the PartitionIterator where
+    an iloc call is made to the table to pull in every single row. This results in a join query run for every single
+    row, which is inefficient because a lot more queries are issued. This option should be used when a Snowpark pandas
+    DataFrame or Series is to be returned to avoid downloading and uploading the same data.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe to iterate over.
+    axis : {0, 1}
+        Axis to iterate over.
+    func : callable
+        The function to get inner iterables from each partition.
+    enable_partition_with_native_pandas: bool, default False
+        When True, retrieve the table as partitions. Each partition is a pandas DataFrame which is iterated over until
+        exhausted, and the next partition is pulled in.
+        When False, iterate over the Snowpark pandas DataFrame directly row-by-row.
+    """
+
+    def __init__(
+        self,
+        df: DataFrame,
+        func: Callable,
+        enable_partition_with_native_pandas: bool = False,
+    ) -> None:
+        self.position = 0  # keep track of position in the iterator
+        # To avoid making a query per row to extract row data (like in DataFrame.iterrows and DataFrame.itertuples),
+        # a batch of rows of size PARTITION_SIZE is materialized at a time and converted to a pandas DataFrame.
+        # This uses fewer queries. Partitions are used instead of materializing the whole table since some tables
+        # are too large to be materialized in one go. PARTITION_SIZE is arbitrary and can be tuned for performance.
+        self.df = df
+        self.func = func
+        self.enable_partition_with_native_pandas = enable_partition_with_native_pandas
+        # TODO SNOW-1017263: update to_pandas() to return an iterator and use that directly here.
+        if self.enable_partition_with_native_pandas:
+            self.partition = self.get_next_partition()
+            self.num_rows = -1  # unused
+        else:
+            self.partition = None  # unused
+            # The call below triggers eager evaluation for row count - it is used as a stopping condition to raise
+            # StopIteration for the iterator.
+            self.num_rows = len(self.df)
+
+    def __iter__(self) -> "SnowparkPandasRowPartitionIterator":
+        """
+        Implement iterator interface.
+
+        Returns
+        -------
+        SnowparkPandasRowPartitionIterator
+            Iterator object.
+        """
+        return self
+
+    def __next__(self) -> Any:
+        """
+        Implement iterator interface.
+
+        Returns
+        -------
+        Any
+            Next element in the SnowparkPandasRowPartitionIterator after the callable func is applied.
+        """
+        # self.position is used to get the integer location of rows.
+        if self.enable_partition_with_native_pandas:
+            if len(self.partition) <= self.position % PARTITION_SIZE:
+                raise StopIteration
+            ser = self.partition.iloc[self.position % PARTITION_SIZE]
+            self.position += 1
+            if self.position and self.position % PARTITION_SIZE == 0:
+                # Finished iterating through the current partition, fetch the next partition.
+                self.partition = self.get_next_partition()
+            return self.func(ser)
+        else:
+            if self.position < self.num_rows:
+                ser = self.df.iloc[self.position]
+                self.position += 1
+                return self.func(ser)
+            else:
+                raise StopIteration
+
+    def get_next_partition(self) -> pandas.DataFrame:
+        """
+        Helper method to retrieve a partition of table data of size PARTITION_SIZE number of rows.
+        """
+        return self.df.iloc[
+            slice(self.position, self.position + PARTITION_SIZE)
+        ].to_pandas()
diff --git a/src/snowflake/snowpark/modin/pandas/utils.py b/src/snowflake/snowpark/modin/pandas/utils.py
new file mode 100644
index 00000000000..f458a1d970b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/utils.py
@@ -0,0 +1,740 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement utils for pandas component."""
+
+from collections.abc import Hashable, Iterator, Sequence
+from types import BuiltinFunctionType
+from typing import Any, Callable, Optional, Union
+
+import numpy as np
+import pandas
+from pandas._typing import (
+    AggFuncType,
+    AggFuncTypeBase,
+    AggFuncTypeDict,
+    AnyArrayLike,
+    IndexLabel,
+    Scalar,
+)
+from pandas.core.dtypes.common import is_array_like, is_dict_like, is_list_like
+from pandas.errors import SpecificationError
+from pandas.util._decorators import doc
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+    FactoryDispatcher,
+)
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    get_pandas_aggr_func_name,
+)
+from snowflake.snowpark.modin.plugin.compiler import BaseQueryCompiler
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import hashable
+
+_doc_binary_operation = """
+Return {operation} of {left} and `{right}` (binary operator `{bin_op}`).
+
+Parameters
+----------
+{right} : {right_type}
+    The second operand to perform computation.
+
+Returns
+-------
+{returns}
+"""
+
+
+def from_non_pandas(df, index, columns, dtype):
+    """
+    Convert a non-pandas DataFrame into Modin DataFrame.
+
+    Parameters
+    ----------
+    df : object
+        Non-pandas DataFrame.
+    index : object
+        Index for non-pandas DataFrame.
+    columns : object
+        Columns for non-pandas DataFrame.
+    dtype : type
+        Data type to force.
+
+    Returns
+    -------
+    modin.pandas.DataFrame
+        Converted DataFrame.
+    """
+    # from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
+
+    new_qc = FactoryDispatcher.from_non_pandas(df, index, columns, dtype)
+    if new_qc is not None:
+        from snowflake.snowpark.modin.pandas import DataFrame
+
+        return DataFrame(query_compiler=new_qc)
+    return new_qc
+
+
+def from_pandas(df):
+    """
+    Convert a pandas DataFrame to a Modin DataFrame.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The pandas DataFrame to convert.
+
+    Returns
+    -------
+    modin.pandas.DataFrame
+        A new Modin DataFrame object.
+    """
+    # from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
+    from snowflake.snowpark.modin.pandas import DataFrame
+
+    return DataFrame(query_compiler=FactoryDispatcher.from_pandas(df))
+
+
+def from_arrow(at):
+    """
+    Convert an Arrow Table to a Modin DataFrame.
+
+    Parameters
+    ----------
+    at : Arrow Table
+        The Arrow Table to convert from.
+
+    Returns
+    -------
+    DataFrame
+        A new Modin DataFrame object.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+    from snowflake.snowpark.modin.pandas import DataFrame
+
+    return DataFrame(query_compiler=FactoryDispatcher.from_arrow(at))
+
+
+def from_dataframe(df):
+    """
+    Convert a DataFrame implementing the dataframe exchange protocol to a Modin DataFrame.
+
+    See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The DataFrame object supporting the dataframe exchange protocol.
+
+    Returns
+    -------
+    DataFrame
+        A new Modin DataFrame object.
+    """
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+    from snowflake.snowpark.modin.pandas import DataFrame
+
+    return DataFrame(query_compiler=FactoryDispatcher.from_dataframe(df))
+
+
+def is_scalar(obj):
+    """
+    Return True if given object is scalar.
+
+    This method works the same as is_scalar method from pandas but
+    it is optimized for Modin frames. For BasePandasDataset objects
+    pandas version of is_scalar tries to access missing attribute
+    causing index scan. This triggers execution for lazy frames and
+    we avoid it by handling BasePandasDataset objects separately.
+
+    Parameters
+    ----------
+    obj : object
+        Object to check.
+
+    Returns
+    -------
+    bool
+        True if given object is scalar and False otherwise.
+    """
+    from pandas.api.types import is_scalar as pandas_is_scalar
+
+    from .base import BasePandasDataset
+
+    return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)
+
+
+def is_full_grab_slice(slc, sequence_len=None):
+    """
+    Check that the passed slice grabs the whole sequence.
+
+    Parameters
+    ----------
+    slc : slice
+        Slice object to check.
+    sequence_len : int, optional
+        Length of the sequence to index with the passed `slc`.
+        If not specified the function won't be able to check whether
+        ``slc.stop`` is equal or greater than the sequence length to
+        consider `slc` to be a full-grab, and so, only slices with
+        ``.stop is None`` are considered to be a full-grab.
+
+    Returns
+    -------
+    bool
+    """
+    assert isinstance(slc, slice), "slice object required"
+    return (
+        slc.start in (None, 0)
+        and slc.step in (None, 1)
+        and (
+            slc.stop is None or (sequence_len is not None and slc.stop >= sequence_len)
+        )
+    )
+
+
+def from_modin_frame_to_mi(df, sortorder=None, names=None):
+    """
+    Make a pandas.MultiIndex from a DataFrame.
+
+    Parameters
+    ----------
+    df : DataFrame
+        DataFrame to be converted to pandas.MultiIndex.
+    sortorder : int, default: None
+        Level of sortedness (must be lexicographically sorted by that
+        level).
+    names : list-like, optional
+        If no names are provided, use the column names, or tuple of column
+        names if the columns is a MultiIndex. If a sequence, overwrite
+        names with the given sequence.
+
+    Returns
+    -------
+    pandas.MultiIndex
+        The pandas.MultiIndex representation of the given DataFrame.
+    """
+    from snowflake.snowpark.modin.pandas import DataFrame
+
+    if isinstance(df, DataFrame):
+        df = df._to_pandas()
+    return _original_pandas_MultiIndex_from_frame(df, sortorder, names)
+
+
+def is_label(obj, label, axis=0):
+    """
+    Check whether or not 'obj' contain column or index level with name 'label'.
+
+    Parameters
+    ----------
+    obj : modin.pandas.DataFrame, modin.pandas.Series or modin.core.storage_formats.base.BaseQueryCompiler
+        Object to check.
+    label : object
+        Label name to check.
+    axis : {0, 1}, default: 0
+        Axis to search for `label` along.
+
+    Returns
+    -------
+    bool
+        True if check is successful, False otherwise.
+    """
+    qc = getattr(obj, "_query_compiler", obj)
+    return hashable(label) and (
+        label in qc.get_axis(axis ^ 1) or label in qc.get_index_names(axis)
+    )
+
+
+def check_both_not_none(option1, option2):
+    """
+    Check that both `option1` and `option2` are not None.
+
+    Parameters
+    ----------
+    option1 : Any
+        First object to check if not None.
+    option2 : Any
+        Second object to check if not None.
+
+    Returns
+    -------
+    bool
+        True if both option1 and option2 are not None, False otherwise.
+    """
+    return not (option1 is None or option2 is None)
+
+
+def _walk_aggregation_func(
+    key: IndexLabel, value: AggFuncType, depth: int = 0
+) -> Iterator[tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:
+    """
+    Walk over a function from a dictionary-specified aggregation.
+
+    Note: this function is not supposed to be called directly and
+    is used by ``walk_aggregation_dict``.
+
+    Parameters
+    ----------
+    key : IndexLabel
+        A key in a dictionary-specified aggregation for the passed `value`.
+        This means an index label to apply the `value` functions against.
+    value : AggFuncType
+        An aggregation function matching the `key`.
+    depth : int, default: 0
+        Specifies a nesting level for the `value` where ``depth=0`` is when
+        you call the function on a raw dictionary value.
+
+    Yields
+    ------
+    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)
+        Yield an aggregation function with its metadata:
+            - `col`: column name to apply the function.
+            - `func`: aggregation function to apply to the column.
+            - `func_name`: custom function name that was specified in the dict.
+            - `col_renaming_required`: whether it's required to rename the
+                `col` into ``(col, func_name)``.
+    """
+    col_renaming_required = bool(depth)
+
+    if isinstance(value, (list, tuple)):
+        if depth == 0:
+            for val in value:
+                yield from _walk_aggregation_func(key, val, depth + 1)
+        elif depth == 1:
+            if len(value) != 2:
+                raise ValueError(
+                    f"Incorrect rename format. Renamer must consist of exactly two elements, got: {len(value)}."
+                )
+            func_name, func = value
+            yield key, func, func_name, col_renaming_required
+        else:
+            # pandas doesn't support this as well
+            ErrorMessage.not_implemented(
+                "Nested renaming is not supported."
+            )  # pragma: no cover
+    else:
+        yield key, value, None, col_renaming_required
+
+
+def walk_aggregation_dict(
+    agg_dict: AggFuncTypeDict,
+) -> Iterator[tuple[IndexLabel, AggFuncTypeBase, Optional[str], bool]]:
+    """
+    Walk over an aggregation dictionary.
+
+    Parameters
+    ----------
+    agg_dict : AggFuncTypeDict
+
+    Yields
+    ------
+    (col: IndexLabel, func: AggFuncTypeBase, func_name: Optional[str], col_renaming_required: bool)
+        Yield an aggregation function with its metadata:
+            - `col`: column name to apply the function.
+            - `func`: aggregation function to apply to the column.
+            - `func_name`: custom function name that was specified in the dict.
+            - `col_renaming_required`: whether it's required to rename the
+                `col` into ``(col, func_name)``.
+    """
+    for key, value in agg_dict.items():
+        yield from _walk_aggregation_func(key, value)
+
+
+def raise_if_native_pandas_objects(obj: Any) -> None:
+    """
+    Raise TypeError if provided object is of type pandas.Series or pandas.DataFrame
+
+    Args:
+        obj: object to check
+
+    Raises:
+        TypeError if provided ``obj`` is either native pandas DataFrame or Series
+
+    """
+    if isinstance(obj, (pandas.DataFrame, pandas.Series)):
+        raise TypeError(
+            f"{type(obj)} is not supported as 'value' argument. Please convert this to "
+            "Snowpark pandas objects by calling modin.pandas.Series()/DataFrame()"
+        )
+
+
+def replace_external_data_keys_with_empty_pandas_series(
+    keys: Optional[
+        Union[Hashable, AnyArrayLike, Sequence[Union[Hashable, AnyArrayLike]]]
+    ] = None
+) -> Optional[Union[Hashable, pandas.Series, list[Union[Hashable, pandas.Series]]]]:
+    """
+    Replace any array-like key with empty series.
+    Args:
+        keys: join key or sequence of join keys.
+
+    Returns:
+        Join key(s) by replacing array-like join key with empty series.
+    """
+    if keys is None:
+        return None
+    if is_array_like(keys):
+        return create_empty_pandas_series_from_array_like(keys)
+    if isinstance(keys, (list, tuple)):
+        return [
+            create_empty_pandas_series_from_array_like(key)
+            if is_array_like(key)
+            else key
+            for key in keys
+        ]
+    return keys
+
+
+def create_empty_pandas_series_from_array_like(obj: AnyArrayLike) -> pandas.Series:
+    """
+    Create empty (zero rows) native pandas series from given array-like object.
+    Args:
+        obj:  array-like object
+
+    Returns:
+        Native pandas series with zero rows.
+
+    """
+    assert is_array_like(obj)
+    # Snowpark pandas series.
+    if isinstance(obj, pd.Series):
+        return create_empty_native_pandas_frame(obj).squeeze()
+    # Everything else first gets converted to pandas.Series
+    if not isinstance(obj, pandas.Series):
+        obj = pandas.Series(obj)
+    # Create empty series by calling head with zero rows.
+    return obj.head(0)
+
+
+def create_empty_native_pandas_frame(
+    obj: Union["pd.Series", "pd.DataFrame"]
+) -> pandas.DataFrame:
+    """
+    Create an empty native pandas DataFrame using the columns and index labels info from
+    the given object. Empty here implies zero rows.
+
+    Args:
+        obj: Snowflake Series or DataFrame.
+
+    Returns:
+        A native pandas DataFrame with 0 rows in it.
+    """
+    qc = obj._query_compiler
+    index_names = qc.get_index_names()
+    index = (
+        pandas.MultiIndex.from_tuples(tuples=[], names=index_names)
+        if len(index_names) > 1
+        else pandas.Index(data=[], name=index_names[0])
+    )
+    return pandas.DataFrame(columns=qc.columns, index=index)
+
+
+def replace_external_data_keys_with_query_compiler(
+    frame: "pd.DataFrame",
+    keys: Optional[
+        Union[Hashable, AnyArrayLike, Sequence[Union[Hashable, AnyArrayLike]]]
+    ] = None,
+) -> Optional[
+    Union[Hashable, BaseQueryCompiler, list[Union[Hashable, BaseQueryCompiler]]]
+]:
+    """
+    Replace any array-like join key(s) with query compiler.
+
+    Args:
+        frame: dataframe, join keys belong to.
+        keys: join key or sequence of join keys.
+
+    Returns:
+        List of join keys by replacing array-like join keys with query compiler.
+
+    """
+    if keys is None:
+        return None
+    if not isinstance(keys, (list, tuple)):
+        keys = [keys]
+    replaced_keys = []
+    for key in keys:
+        if is_array_like(key):
+            raise_if_native_pandas_objects(key)
+            if not isinstance(key, pd.Series):
+                key = pd.Series(key)
+            # Native pandas raises
+            # ValueError: The truth value of an array with more than one element is ambiguous
+            # Error message is not very helpful. We instead raise error with
+            # more helpful message.
+            if frame.shape[0] != key.shape[0]:
+                raise ValueError(
+                    "array-like join key must be of same length as dataframe"
+                )
+            replaced_keys.append(key._query_compiler)
+        else:
+            replaced_keys.append(key)
+    return replaced_keys
+
+
+def try_convert_builtin_func_to_str(
+    fn: Union[AggFuncTypeBase, list[AggFuncTypeBase]], obj: object
+) -> Union[AggFuncTypeBase, list[AggFuncTypeBase]]:
+    """
+    Try to convert an aggregation function to a string or list of such if the function is a
+    builtin function and supported in the current object dir.
+
+    This is mainly required by our server backend aggregation function mapping, which requires the
+    function to be in string format or numpy function (numpy function is handled differently because
+    it can potentially have different behavior as builtin function, For example: np.percentile and
+    percentile have different behavior). For any function that can not find a map in snowflake, it will
+    go through fallback, includes function that is not a numpy function and can not be converted to
+    string format.
+
+    Args:
+        fn : callable, str, or list of above
+        obj : the object to search for function dir
+
+    Returns:
+        str, callable or list of above
+            If `fn` is a callable, return its name if it's a builtin function (i.e. min, max)
+            and it is a method of the current object, otherwise return `fn` itself.
+            If `fn` is a string, return it.
+            If `fn` is an Iterable, return a list of try_convert_func_to_str applied to
+            each element of `fn`.
+    """
+
+    def _try_convert_single_builtin_func_to_str(f):
+        return (
+            f.__name__
+            if (
+                callable(f)
+                and isinstance(f, BuiltinFunctionType)
+                and f.__name__ in dir(obj)
+            )
+            else f
+        )
+
+    if is_list_like(fn):
+        return [_try_convert_single_builtin_func_to_str(f) for f in fn]
+    else:
+        return _try_convert_single_builtin_func_to_str(fn)
+
+
+def validate_and_try_convert_agg_func_arg_func_to_str(
+    agg_func: AggFuncType, obj: object, allow_duplication: bool, axis: int
+) -> AggFuncType:
+    """
+    Perform validation on the func argument for aggregation, and try to convert builtin function in agg_func to str.
+    Following validation is performed:
+    1) Argument agg_func can not be None.
+    2) If agg_func is dict like, the values of the dict can not be dict like, and if the aggregation is across axis=0,
+       all keys must be a valid column of the object. When axis=1, we do not check if the labels are present in the index
+       to avoid the extra query needed to materialize it.
+    3) If allow_duplication is False, more than one aggregation function with the same name can not be applied on the
+        same column. For example: [min, max, min] is not valid. This is mainly used by general aggregation.
+
+    This function also calls try_convert_func_to_str on agg_func to convert the builtin functions used in agg_func to
+    str but keep the original dict like or list like format. This is mainly required by our server backend aggregation
+    function mapping, which requires the function to be in string format or numpy function (numpy function is handled
+    differently because it can potentially have different behavior as builtin function, For example: np.percentile and
+    percentile have different behavior). For any function that can not find a map in snowflake, it will
+    go through fallback, includes function that is not a numpy function and can not be converted to string format.
+
+    Args:
+        agg_func: AggFuncType
+            The func arg passed for the aggregation
+        obj: object
+            The object to search for attributes
+        allow_duplication: bool
+            Whether allow duplicated function with the same name. Note that numpy functions has different function
+            name compare with the equivalent builtin function, for example, np.min and min have different
+            names ('amin' and 'min'). However, this behavior is changing with python 3.9,
+            where np.min will have the same name 'min'.
+        axis: int
+            The axis across which the aggregation is applied.
+
+    Returns:
+        Processed aggregation function arg with builtin function converted to name
+    Raises:
+        SpecificationError
+            If nested dict configuration is used when agg_func is dict like or functions with duplicated names.
+
+    """
+    if agg_func is None:
+        # Snowpark pandas only support func argument at this moment.
+        # TODO (SNOW-902943): pandas allows usage of NamedAgg in kwargs to configure
+        #   tuples of (columns, agg_func) with rename. For example:
+        #   df.groupby('A').agg(b_min=pd.NamedAgg(column='B', aggfunc='min')), which applies
+        #   min function on column 'B', and uses 'b_min' as the new column name.
+        #   Once supported, refine the check to check both.
+        ErrorMessage.not_implemented(
+            "Must provide value for 'func' argument, func=None is currently not supported with Snowpark pandas"
+        )
+
+    if callable(agg_func):
+        result_agg_func = try_convert_builtin_func_to_str(agg_func, obj)
+    elif is_dict_like(agg_func):
+        # A dict like func input should in format like {'col1': max, 'col2': [min, np.max]}, where each
+        # entry have key as the data column label, and value as the aggregation functions to apply on
+        # the column. Following checks and process will be performed if the input is dict like:
+        # 1) Perform check for the dict entries to make sure all columns belongs to the data columns, and
+        #    no nested dictionary is used in the configuration.
+        # 2) Perform a processing to the values (aggregation function) to convert the function to string
+        #    format if possible. For example, {'col1': max, 'col2': [min, np.max]} will be processed to
+        #    {'col1': 'max', 'col2': ['min', np.max]}
+
+        # check if there is any value also in dictionary format, which is not allowed in pandas
+        if any(is_dict_like(fn) for fn in agg_func.values()):
+            raise SpecificationError(
+                "Value for func argument with nested dict format is not allowed."
+            )
+        if any(is_list_like(fn) and len(fn) == 0 for fn in agg_func.values()):
+            # A label must have aggregations provided, e.g. df.agg({0: []}) is illegal
+            raise ValueError("No objects to concatenate")
+        # check that all columns in the dictionary exists in the data columns of the current dataframe
+        columns = obj._query_compiler.columns
+        if axis == 0:
+            # If axis == 1, we would need a query to materialize the index to check its existence
+            # so we defer the error checking to later.
+            for i in agg_func.keys():
+                if i not in columns:
+                    raise KeyError(f"Column(s) ['{i}'] do not exist")
+
+        func_dict = {
+            label: try_convert_builtin_func_to_str(fn, obj)
+            for label, fn in agg_func.items()
+        }
+
+        result_agg_func = func_dict
+    elif is_list_like(agg_func):
+        # When the input func is in list like format like [min, max, np.sum], perform a processing to the
+        # aggregation function to convert it to string representation if possible.
+        result_agg_func = try_convert_builtin_func_to_str(agg_func, obj)
+    else:
+        result_agg_func = agg_func
+
+    if not allow_duplication:
+        # if allow_duplication is False, check is there duplication in the function names, which
+        # are used as the row label for the aggregation result in dataframe/series aggregation, and
+        # not allowed in pandas.
+        found_duplication = False
+        if is_dict_like(result_agg_func):
+            for agg_func in result_agg_func.values():
+                if is_list_like(agg_func):
+                    agg_func_names = [get_pandas_aggr_func_name(fn) for fn in agg_func]
+                    found_duplication = len(agg_func_names) > len(set(agg_func_names))
+                    break
+        elif is_list_like(result_agg_func):
+            agg_func_names = [get_pandas_aggr_func_name(fn) for fn in result_agg_func]
+            found_duplication = len(agg_func_names) > len(set(agg_func_names))
+
+        if found_duplication:
+            raise SpecificationError("Function names must be unique!")
+
+    return result_agg_func
+
+
+def _doc_binary_op(operation, bin_op, left="Series", right="right", returns="Series"):
+    """
+    Return callable documenting `Series` or `DataFrame` binary operator.
+
+    Parameters
+    ----------
+    operation : str
+        Operation name.
+    bin_op : str
+        Binary operation name.
+    left : str, default: 'Series'
+        The left object to document.
+    right : str, default: 'right'
+        The right operand name.
+    returns : str, default: 'Series'
+        Type of returns.
+
+    Returns
+    -------
+    callable
+    """
+    if left == "Series":
+        right_type = "Series or scalar value"
+    elif left == "DataFrame":
+        right_type = "DataFrame, Series or scalar value"
+    elif left == "BasePandasDataset":
+        right_type = "BasePandasDataset or scalar value"
+    else:
+        ErrorMessage.not_implemented(
+            f"Only 'BasePandasDataset', `DataFrame` and 'Series' `left` are allowed, actually passed: {left}"
+        )  # pragma: no cover
+    doc_op = doc(
+        _doc_binary_operation,
+        operation=operation,
+        right=right,
+        right_type=right_type,
+        bin_op=bin_op,
+        returns=returns,
+        left=left,
+    )
+
+    return doc_op
+
+
+def get_as_shape_compatible_dataframe_or_series(
+    other: Union["pd.DataFrame", "pd.Series", Callable, AnyArrayLike, Scalar],
+    reference_df: "pd.DataFrame",
+    shape_mismatch_message: Optional[
+        str
+    ] = "Array conditional must be same shape as self",
+) -> Union["pd.DataFrame", "pd.Series"]:
+    """
+    Get the "other" type as a shape compatible dataframe or series using the reference_df as a reference for
+    compatible shape and construction.  If there is no shape on the other type then wrap as a numpy array.
+
+    Parameters
+    ----------
+        other : Other type which could be array like
+        reference_df : Reference dataframe or series
+
+    Returns
+    -------
+        Dataframe or series that contains same values as other
+    """
+    if not hasattr(other, "shape"):
+        # If an array type is provided that doesn't have a shape, then wrap it so it has a shape.
+        # For example, if other=[1,2,3] then np.asanyarray will wrap as a numpy array with correct shape,
+        # ie, np.anyarray(other).shape=(3,) in this case.
+        other = np.asanyarray(other)
+
+    if len(other.shape) == 0 or other.shape != reference_df.shape:
+        raise ValueError(shape_mismatch_message)
+
+    if "columns" in reference_df:
+        other = reference_df.__constructor__(
+            other, index=reference_df.index, columns=reference_df.columns
+        )
+    else:
+        other = reference_df.__constructor__(other, index=reference_df.index)
+
+    return other
+
+
+_original_pandas_MultiIndex_from_frame = pandas.MultiIndex.from_frame
+pandas.MultiIndex.from_frame = from_modin_frame_to_mi
diff --git a/src/snowflake/snowpark/modin/pandas/window.py b/src/snowflake/snowpark/modin/pandas/window.py
new file mode 100644
index 00000000000..bf71341a430
--- /dev/null
+++ b/src/snowflake/snowpark/modin/pandas/window.py
@@ -0,0 +1,463 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Implement Window and Rolling public API."""
+from typing import Any, Literal, Optional, Union
+
+import numpy as np  # noqa: F401
+import pandas.core.window.rolling
+
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+
+# add these two lines to enable doc tests to run
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
+from snowflake.snowpark.modin.utils import (
+    _inherit_docstrings,
+    doc_replace_dataframe_with_link,
+)
+
+
+@_inherit_docstrings(
+    pandas.core.window.rolling.Window, modify_doc=doc_replace_dataframe_with_link
+)
+# TODO SNOW-1041934: Add support for more window aggregations
+class Window(metaclass=TelemetryMeta):
+    def __init__(
+        self,
+        dataframe,
+        window: Any = None,
+        min_periods: int = None,
+        center: bool = False,
+        win_type: str = None,
+        on: str = None,
+        axis: Union[int, str] = 0,
+        closed: str = None,
+        step: int = None,
+        method: str = "single",
+    ) -> None:
+        # TODO: SNOW-1063357: Modin upgrade - modin.pandas.window.Window
+        self._dataframe = dataframe
+        self._query_compiler = dataframe._query_compiler
+        self.window_kwargs = {  # pragma: no cover
+            "window": window,
+            "min_periods": min_periods,
+            "center": center,
+            "win_type": win_type,
+            "on": on,
+            "axis": axis,
+            "closed": closed,
+            "step": step,
+            "method": method,
+        }
+        self.axis = axis
+
+    def mean(self, *args, **kwargs):
+        # TODO: SNOW-1063357: Modin upgrade - modin.pandas.window.Window
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.window_mean(
+                self.axis, self.window_kwargs, *args, **kwargs
+            )
+        )
+
+    def sum(self, *args, **kwargs):
+        # TODO: SNOW-1063357: Modin upgrade - modin.pandas.window.Window
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.window_sum(
+                self.axis, self.window_kwargs, *args, **kwargs
+            )
+        )
+
+    def var(self, ddof=1, *args, **kwargs):
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.window_var(
+                self.axis, self.window_kwargs, ddof, *args, **kwargs
+            )
+        )
+
+    def std(self, ddof=1, *args, **kwargs):
+        # TODO: SNOW-1063357: Modin upgrade - modin.pandas.window.Window
+        return self._dataframe.__constructor__(
+            query_compiler=self._query_compiler.window_std(
+                self.axis, self.window_kwargs, ddof, *args, **kwargs
+            )
+        )
+
+
+@_inherit_docstrings(
+    pandas.core.window.rolling.Rolling,
+    excluded=[pandas.core.window.rolling.Rolling.__init__],
+    modify_doc=doc_replace_dataframe_with_link,
+)
+# TODO SNOW-1041934: Add support for more window aggregations
+class Rolling(metaclass=TelemetryMeta):
+    def __init__(
+        self,
+        dataframe,
+        window: Any,
+        min_periods: Optional[int] = None,
+        center: bool = False,
+        win_type: Optional[str] = None,
+        on: Optional[str] = None,
+        axis: Union[int, str] = 0,
+        closed: Optional[str] = None,
+        step: Optional[int] = None,
+        method: str = "single",
+    ) -> None:
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        # Raise ValueError when invalid parameter values/combinations
+        if (isinstance(window, int) and window <= 0) or window is None:
+            raise ValueError("window must be an integer 0 or greater")
+        if not isinstance(center, bool):
+            raise ValueError("center must be a boolean")
+        if min_periods is not None and not isinstance(min_periods, int):
+            raise ValueError("min_periods must be an integer")
+        if isinstance(min_periods, int) and min_periods < 0:
+            raise ValueError("min_periods must be >= 0")
+        if (
+            isinstance(min_periods, int)
+            and isinstance(window, int)
+            and min_periods > window
+        ):
+            raise ValueError(f"min_periods {min_periods} must be <= window {window}")
+
+        self._dataframe = dataframe
+        self._query_compiler = dataframe._query_compiler
+        self.rolling_kwargs = {
+            "window": window,
+            "min_periods": min_periods,
+            "center": center,
+            "win_type": win_type,
+            "on": on,
+            "axis": axis,
+            "closed": closed,
+            "step": step,
+            "method": method,
+        }
+        self.axis = axis
+
+    def _call_qc_method(self, method_name, *args, **kwargs):
+        """
+        Call a query compiler method for the specified rolling aggregation.
+
+        Parameters
+        ----------
+        method_name : str
+            Name of the aggregation.
+        *args : tuple
+            Positional arguments to pass to the query compiler method.
+        **kwargs : dict
+            Keyword arguments to pass to the query compiler method.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler holding the result of the aggregation.
+        """
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        qc_method = getattr(self._query_compiler, f"rolling_{method_name}")
+        return qc_method(self.axis, self.rolling_kwargs, *args, **kwargs)
+
+    def _aggregate(self, method_name, *args, **kwargs):
+        """
+        Run the specified rolling aggregation.
+
+        Parameters
+        ----------
+        method_name : str
+            Name of the aggregation.
+        *args : tuple
+            Positional arguments to pass to the aggregation.
+        **kwargs : dict
+            Keyword arguments to pass to the aggregation.
+
+        Returns
+        -------
+        DataFrame or Series
+            Result of the aggregation.
+        """
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        qc_result = self._call_qc_method(method_name, *args, **kwargs)
+        return self._dataframe.__constructor__(query_compiler=qc_result)
+
+    def count(self, numeric_only: bool = False):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(method_name="count", numeric_only=numeric_only)
+
+    def sem(
+        self,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="sem", ddof=ddof, numeric_only=numeric_only, *args, **kwargs
+        )
+
+    def sum(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="sum",
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def mean(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="mean",
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def median(
+        self,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="median",
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            **kwargs,
+        )
+
+    def var(
+        self,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="var",
+            ddof=ddof,
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def std(
+        self,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="std",
+            ddof=ddof,
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def min(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="min",
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def max(
+        self,
+        numeric_only: bool = False,
+        *args: Any,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="max",
+            numeric_only=numeric_only,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            *args,
+            **kwargs,
+        )
+
+    def corr(
+        self,
+        other: Optional[SnowparkDataFrame] = None,
+        pairwise: Optional[bool] = None,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="corr",
+            other=other,
+            pairwise=pairwise,
+            ddof=ddof,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def cov(
+        self,
+        other: Optional[SnowparkDataFrame] = None,
+        pairwise: Optional[bool] = None,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="cov",
+            other=other,
+            pairwise=pairwise,
+            ddof=ddof,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def skew(
+        self,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(method_name="skew", numeric_only=numeric_only, **kwargs)
+
+    def kurt(
+        self,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(method_name="kurt", numeric_only=numeric_only, **kwargs)
+
+    def apply(
+        self,
+        func: Any,
+        raw: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        args: Optional[tuple] = None,
+        kwargs: Optional[dict] = None,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="apply",
+            func=func,
+            raw=raw,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
+            args=args,
+            kwargs=kwargs,
+        )
+
+    def aggregate(
+        self,
+        func: Union[str, list, dict],
+        *args: Any,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(method_name="aggregate", func=func, *args, **kwargs)
+
+    agg = aggregate
+
+    def quantile(
+        self,
+        quantile: float,
+        interpolation: str = "linear",
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="quantile",
+            quantile=quantile,
+            interpolation=interpolation,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+    def rank(
+        self,
+        method: str = "average",
+        ascending: bool = True,
+        pct: bool = False,
+        numeric_only: bool = False,
+        **kwargs,
+    ):
+        # TODO: SNOW-1063358: Modin upgrade - modin.pandas.window.Rolling
+        return self._aggregate(
+            method_name="rank",
+            method=method,
+            ascending=ascending,
+            pct=pct,
+            numeric_only=numeric_only,
+            **kwargs,
+        )
+
+
+# TODO: SNOW-1063366: Modin upgrade - modin.pandas.window.Expanding
diff --git a/src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md b/src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
new file mode 100644
index 00000000000..9d1e7b6b698
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
@@ -0,0 +1,312 @@
+## 1.15.0a1 (tbd)
+
+### Bug Fixes
+- Fixed overriding of subclasses' property docstrings for modin issue https://github.com/modin-project/modin/issues/7113.
+
+## 1.14.0a2 (2024-04-18)
+
+### Behavior Changes
+- The `to_pandas` operation converts all integers to int64, instead of int8, int16 etc. To get an exact type, the user needs to explicitly specify precision values for their Snowflake column. This is a general behavior change across all of Snowpark.
+- The following API changes are made to align Snowpark pandas with the pandas 2.2.1 API:
+  - Updated DateOffset strings to pandas 2.2.1 versions.
+  - As part of this transition, we have a set of transitional API and test bugs:
+    - SNOW-1320623, SNOW-1321196 - pandas `df.loc` and `__setitem__` have buggy behavior when:
+      - the column key has duplicates in a specific manner (https://github.com/pandas-dev/pandas/issues/58317), or
+      - a new row and column are used in the row and column keys (https://github.com/pandas-dev/pandas/issues/58316).
+      Snowpark pandas deviates from this behavior and will maintain the same behavior as pandas from versions 2.1.x.
+    - SNOW-1320660 - `qcut` / `cut` with bin preparation is temporarily NotImplemented due to upstream changes.
+    - SNOW-1321662 - `merge` fails when join is outer and sort is False.
+    - SNOW-1321682 - `df.melt` w/ duplicated cols.
+    - SNOW-1318223 - `series.py::_flex_method` list-like other (`pd.Index`) may not be supported in pandas now.
+    - SNOW-1321719 - `test_bitwise_operators.py` xfails.
+- Changed the dtype of the index of empty `DataFrame` and `Series` to be `int64` rather than `object` to match the behavior of pandas.
+- Changed the import path of Snowpark pandas package to use Modin 0.28.1 instead. The new recommended import statement is `import modin.pandas as pd; import snowflake.snowpark.modin.plugin`.
+
+### New Features
+- Added support for `axis` argument for `df.where` and `df.mask` when `other` is a Series.
+- Added back `_repr_html_` to DataFrame class for pretty printing (partially reverts commit 576ba26586caca3fa063da1fed465c61091b6d9c).
+- Added support for `DataFrameGroupBy.nunique`.
+
+## 1.14.0a1 (2024-04-11)
+
+### Behavior Changes
+- The following API changes are made to align Snowpark pandas with the pandas 2.1.4 API:
+  - Removed `errors` and `try_cast` arguments from `DataFrame`/`Series.where` and `mask`.
+  - Added the `dtype_backend` argument to `DataFrame`/`Series.convert_dtypes`; this argument is ignored by Snowpark pandas and only exists for compatibility.
+  - Removed `is_copy` from `DataFrame`/`Series.take`.
+  - Removed `squeeze` argument from `DataFrame`/`Series.groupby`. Changed the default value of `group_keys` to `True`, and `observed` to `no_default`.
+  - Limited the length of generated labels and identifiers to 32 characters
+  - Removed the `squeeze`, `prefix`, `mangle_dupe_cols`, `error_bad_lines`, and `warn_bad_lines` arguments from `pd.read_csv`. These were previously unsupported by Snowpark pandas, and existed only for compatibility.
+  - Renamed the `skip_initial_space` argument in `pd.read_csv` to `skipinitialspace`; it remains unsupported and will raise an error if specified.
+  - Added the `date_format` and `dtype_backend` arguments in `pd.read_csv`. These are currently unsupported and added only for compatibility. `dtype_backend` is ignored and will raise a warning if provided, and `date_format` will raise an error.
+  - Added the `dtype_backend`, `filesystem`, and `filters` arguments in `pd.read_parquet`. These are currently unsupported and added only for compatibility. `dtype_backend` is ignored and will raise a warning if provided, and `filesystem` and `filters` will raise an error.
+  - Removed the `numpy` argument from `pd.read_json`. This was previously unsupported, and existed only for compatibility.
+  - Added the `dtype_backend` and `engine` arguments to `pd.read_json`. These are currently unsupported and added only for compatibility; they are ignored and will raise a warning if provided.
+
+- The following methods are removed:
+  - `DataFrame`/`Series.append`
+  - `Series.is_monotonic`
+
+### New Features
+- Added support for `pd.cut` with `retbins=False` and `labels=False`.
+- Added support for `Series.str.strip`.
+- Added support for `Series.str.len`.
+- Added support for `Series.str.capitalize`.
+- Added support for `DataFrame.apply` and `Series.apply` to work with `@udf` decorated functions to allow working with package dependencies.
+- Added support for `DataFrameGroupBy.transform`.
+- Added support for `DataFrame.idxmax`, `DataFrame.idxmin`, `Series.idxmax`, and `Series.idxmin`.
+- Added support for `Series.str.replace`.
+- Added support for `Series.str.split`.
+- Added support for `Series.str.title` and `Series.str.istitle`.
+- Added support for `np.where`, `np.logical_*`, and `np.add` operators via `__array_ufunc__` and `__array_function__`.
+- Added support for `DataFrameGroupby.head` and `DataFrameGroupBy.tail`.
+- Added support for `DataFrameGroupBy.idxmax` and `DataFrameGroupBy.idxmin` for `GroupBy` `axis = 0`.
+- Updated to `snowpark-python` v1.14.0.
+- Updated to `pandas` 2.2.1 from 2.1.4.
+- Added support for `axis` argument for `df.where` and `df.mask` when `other` is a Series.
+
+### Bug Fixes
+- Fixed broadcast when masking a DataFrame with a Series using `df.where` or `df.mask`.
+- Error out when scalar is passed for condition to DataFrame/Series `where` or `mask`.
+- Fixed property docstring generation for some classes that use the telemetry metaclass.
+- Fixed an issue where creating a Snowpark pandas DataFrame from a Series with a tuple `name`, such as `pd.DataFrame(pd.Series(name=("A", 1)))`, did not create `MultiIndex` columns on the resulting frame.
+- Added custom docstrings inplace to avoid module reload errors.
+- Added a separate docstring class for BasePandasDataset.
+- Fixed docstring overrides for subclasses.
+
+## 1.13.0a1 (2024-03-15)
+### Dependency Updates
+- Upgraded `pandas` from 1.5.3 to 2.1.4.
+
+### Behavior Changes
+- Removed support for Python 3.8, as pandas 2.1.4 no longer supports this version. Please upgrade your environment to use Python 3.9 or newer.
+- The following API changes are made as a result of moving from pandas 1.5.3 to 2.1.4:
+  - Removed sized index types like `pd.Int64Index` and `pd.Float64Index`. Index objects are now explicitly constructed with a dtype parameter.
+  - Changed the default dtype of an empty Series from `float64` to `object`.
+  - Changed the default value of `numeric_only` to `False` for many operations. Previously, setting `numeric_only=None` would automatically drop non-numeric columns from a frame if possible; this behavior has been removed, and setting `numeric_only=None` gives the same behavior as `numeric_only=False`.
+  - Removed the `level` parameter from aggregation functions (`sum`, `prod`, `count`, `any`, etc.).
+  - Removed `Series.append`. Use `pd.concat` instead.
+  - Removed the `inplace` parameter for `set_axis`.
+  - Removed the `datetime_is_numeric` parameter for `describe`. All datetime data is now treated as numeric.
+  - Removed the `loffset` and `base` parameters for `resample` and `Grouper`. Use `offset` and `origin` instead.
+  - Added a name to the output of `value_counts`. The returned series will be named `count` when `normalize=False`, and `proportion` when `normalize=True`.
+- The following errors have changed as a result of moving from pandas 1.5.3 to 2.1.4:
+  - When attempting to call `DataFrame.aggregate` with a dict where a label has no associated functions (e.g. `df.aggregate({0: []})`), the error message has changed from "no result" to "No objects to concatenate."
+  - Calling aggregation methods with `numeric_only=True` on non-numeric `Series` objects raises a `TypeError`.
+  - Calling `DataFrame.aggregate` or `Series.aggregate` with a list of aggregations will not raise an error when invalid keyword arguments are passed. For example, `df.agg(["count"], invalid=0)` will not raise an error in Snowpark pandas even though "invalid" is not a valid argument to the `count` aggregation.
+  - Calling `GroupBy.shift` with a non-integer value for the `periods` argument now always raises a `TypeError`. Previously, floating point values that happened to be integers (like `2.0` or `-2.0`) were valid.
+  - Stopped automatically creating a Snowpark session when there is no active Snowpark session. Instead, Snowpark pandas requires a unique active Snowpark session.
+
+### New Features
+- Added `"quantile"` as a valid aggregation in `DataFrame.agg` and `Series.agg`.
+- Added support for binary operations between `DataFrame`/`Series` and `Series`/`DataFrame` along `axis=1`.
+- Added support for binary operations between a `Series` or `DataFrame` object and a list-like object for `axis=1`.
+- Added support for `DataFrame.round` and `Series.round`.
+- Added support for `df.melt` and `pd.melt`
+- Added support for binary operations between two `DataFrame` objects.
+- Added support for `DataFrame.sort_index` and `Series.sort_index` along `axis=0`.
+- Added support for `DataFrame.skew` and `Series.skew` along `axis=0`
+- Added support for reading `SELECT` SQL Queries into a `DataFrame` object via `pd.read_snowflake` and changed `name` argument of `pd.read_snowflake` to `name_or_query`.
+- Added support for `Series.str.startswith` and `Series.str.endswith`.
+- Added support for reading SQL Queries with CTEs and CTEs with anonymous stored procedures into a `DataFrame` object via `pd.read_snowflake`.
+- Added support for `DataFrame.first_valid_index`, `DataFrame.last_valid_index`, `Series.first_valid_index`, and `Series.last_valid_index`.
+- Added support for `DataFrame.ffill`, `DataFrame.pad`, `Series.ffill`, and, `Series.pad`.
+- Added support for reading `CALL SQL` Queries into a `DataFrame` object via `pd.read_snowflake`.
+- Added support for `Series.str.lower` and `Series.str.upper`.
+- Added support for `Series.str.isdigit`, `Series.str.islower`, and `Series.str.isupper`.
+- Added partial support for `DataFrameGroupBy.apply` on `axis=0`, for `func` returning a `DataFrame`.
+- Added partial support for `DataFrameGroupBy.apply` on `axis=0`, for `func` returning an object that is neither a DataFrame nor a Series.
+- Added support for `Series.groupby.cumcount`, `Series.groupby.cummax`, `Series.groupby.cummin`,  and `Series.groupby.cumsum`.
+- Added support for `DataFrame.groupby.cumcount`, `DataFrame.groupby.cummax`, `DataFrame.groupby.cummin`,  and `DataFrame.groupby.cumsum`.
+- Added support for `pd.qcut` with `retbins=False`.
+- Added support for `Series.str.contains` and `Series.str.count`.
+- Added partial support for `DataFrameGroupBy.apply` on `axis=0`, for `func` always returning a `Series` with the same index and same name.
+- Added support for `DataFrameGroupBy.rank` and `SeriesGroupBy.rank`.
+
+### Bug Fixes
+- Allowed getting the Snowpark pandas session before creating a Snowpark pandas Dataframe or Series.
+- Fixed an issue when using `pd.read_snowflake` together with `apply(..., axis=1)` where the row position column could not be disambiguated.
+- Fixed the exception that you get when accessing a missing attribute of the Snowpark pandas module.
+- Using dataframe or series apply(axis=1) when there are multiple sessions no longer raises an exception.
+- Added docstring and doctests to correctly reflect difference between Snowpark pandas and native pandas functionality for `get` method.
+
+### Improvements
+- Improved performance for `DataFrame.apply` and `Series.apply` for `axis=1` for functions passed without type hints by micro-batching rows.
+- Restructure Snowpark pandas documentation
+
+## 1.12.1a1 (2024-02-20)
+
+### New Features
+- Added support for `DataFrame.cummin`, `DataFrame.cummax`, `DataFrame.cumsum`, `Series.cummin`, `Series.cummax`, and `Series.cumsum`.
+- Added support for `groups` and `indices` properties of `groupby` object.
+- Added support for `DataFrame.add_prefix`, `DataFrame.add_suffix`, `Series.add_prefix`, and `Series.add_suffix`.
+- Added support for `DataFrame.rolling` and `Series.rolling` on `axis=0` with integer `window`, `min_periods>=1`, and `center` for aggregations `min`, `max`, `sum`, `mean`, `var`, and `std`.
+- Added support for `DataFrame.rank` and `Series.rank` with `pct=True`.
+- Added support for `pd.date_range`.
+- Added support for the `fill_value` parameter in binary operations.
+- Added support for `Dataframe.duplicated` and `Series.duplicated`.
+- Added support for `Dataframe.drop_duplicates` and `Series.drop_duplicates`.
+- Added support for binary operations between `DataFrame` and `Series` (and vice-versa).
+- Added support for binary operations between a `Series` or `DataFrame` object and a list-like object for `axis=0`.
+
+### Behavior Changes
+- Deprecated support for Python 3.8. A future release will upgrade the `pandas` version to 2.1.4, which no longer supports Python 3.8. Users should upgrade Python to 3.9 or later.
+
+### Improvements
+- Added cleanup logic at interpreter shutdown to close all active sessions.
+- Improved performance for `DataFrame.apply` for `axis=1` by relying on Snowflake vectorized UDFs instead of vectorized UDTFs together with dynamic pivot.
+
+### Bug Fixes
+- Fixed bug for `loc` when the index is unordered and the key is a slice with reversed order.
+- Fixed bug for `pd.get_dummies` when input has been sorted, or just read from Snowflake.
+
+## 1.12.0a1 (2024-02-02)
+
+### Improvements
+- Enabled telemetry for several private methods, e.g., `__getitem__` and `__setitem__`.
+- Removed `to_numeric` length check.
+- Added parameter type validation for aggregation, includes numeric_only, skipna and min_count.
+- Changed `to_pandas` to return decimal numbers as `float64` instead of `object`  based on Snowpark 1.12 release.
+
+### Bug Fixes
+- Fixed bug where `loc` get on multiindex prefix matching.
+- Removed the `modin.pandas.Session` reference to the Snowpark Session class.
+- Removed unnecessary coalescing of join keys for left, right and inner join/merge.
+
+### New Features
+- Added support for `DataFrame.diff` and `Series.diff`.
+- Added support for `DataFrame.groupby.shift` and `Series.groupby.shift`
+- Added support for `DataFrame.quantile` and `Series.quantile`
+- Added support for `min`, `max`, `count`, and `sum` aggregations with `axis=1`.
+- Added support for `DataFrame.resample` and `Series.resample` for aggregations: `median`, `sum`, `std`, `var`, `count`.
+- Added support for binary operations with `pd.DateOffset` where offset is treated as a timedelta.
+- Added support for `DataFrame.fillna` where `value` is a dataframe or `Series.fillna` where `value` is a series or dict.
+- Added support for `DataFrame.isin`.
+- Added support for `pd.get_dummies` for DataFrames and Series if params `dummy_na`, `drop_first` and `dtype` take default values.
+- Added support for `groupby` with `sum`, `DataFrame.sum`, and `Series.sum` for string-typed data.
+- Added support for `DataFrame.select_dtypes`.
+- Added support for partial string indexing for `DatetimeIndex`.
+- Added support for `DataFrame.iterrows` and `DataFrame.itertuples`.
+- Added support for `DataFrame.sample` and `Series.sample`.
+- Added support for `DataFrame.shift` and `Series.shift` with `axis=0,1`, `fill_value` and `periods`.
+- Added support for `DataFrame.rank` and `Series.rank`.
+- Added support for `DataFrame.describe` and `Series.describe`.
+- Added support for `DataFrame.replace` and `Series.replace`.
+
+### Bug Fixes
+- Fixed bug when `apply` has been called multiple times.
+- Fixed bug where `loc` with slice key on a single row dataframe or series.
+- Fixed bug where `series.reset_index` triggers eager evaluation.
+
+## 1.11.1a1 (2023-12-21)
+
+### Improvements
+- Improved performance of `transpose` by removing unnecessary count queries.
+- Raised NotImplementedError where setting cell with list like values.
+- Reduced the number of queries for `iloc` get with scalar row key
+- Improved performance of `insert` by removing count query.
+- Improved performance of displaying Dataframe/Series in notebook. As part of this improvement we also removed support for html representation for DataFrames.
+- Enabled SQL simplifier.
+- Started warning users about all fallbacks to pandas via stored procedures.
+
+### Bug Fixes
+- Fixed bug when `setitem`/`loc` on empty frame returns wrong result.
+- Fixed bug where telemetry message can be duplicated.
+
+## 1.10.0a1 (2023-12-13)
+
+### New Features
+- Added support for setting the Snowpark session for Snowpark pandas DataFrame/Series, via `snowflake.snowpark.modin.pandas.session`.
+- Added support for `ngroups` on `groupby` object.
+- Added support for `Series.set_axis()` and `DataFrame.set_axis()`.
+- Added support for `Series.dt.month`, `Series.dt.year`, `Series.dt.day` and `Series.dt.quarter`.
+- Added support for `DataFrame.transform` with string and callable parameters.
+- Added support for `DataFrame.abs`, `Series.abs`, `DataFrame.__neg__` and `Series.__neg__`.
+- Added support for `df.resample` and `ser.resample`. Supported resample bins are: `T`, `S`, `H`, and `D`. Supported aggregations are: `max`, `min`, and `mean`.
+- Added support for `pd.read_parquet` using Snowflake `COPY INTO` SQL command.
+- Added support for `pd.read_json` using Snowflake `COPY INTO` SQL command.
+- Added support for `DataFrame.value_counts` and `Series.value_counts`.
+- Added support for `DataFrame.all`, `Series.all`, `Dataframe.any` and `Series.any` for integer
+- Added support for `Series.mask()` and `DataFrame.mask()`.
+- Added support for `ffill` on `df.resample`.
+- Added support for `method` parameter of `DataFrame.fillna()` and `Series.fillna()`.
+
+### Improvements
+- Updated with changes from snowpark-python 1.8.0 release.
+- Rewrote and improved `.iloc` get using single query with lazy evaluation.
+- Improved warning messages from `.to_datetime`.
+- Improved `.to_datetime` to avoid unnecessary eager evaluation.
+- Improved performance for fallback execution, i.e., running unsupported pandas APIs using stored procedures.
+- Rewrote and improved `.loc` get using single query with lazy evaluation.
+- Rewrote and improved `.loc` set using single query with lazy evaluation.
+- Changed the implementation of `pd.read_csv` to use Snowflake `COPY INTO` SQL command instead of locally executing pandas `read_csv`.
+- Improved performance of groupby by removing unnecessary count queries.
+- Raise NotImplementedError for pivot_table when no index configured.
+- memory_usage() will not return an error, but it will return '0' for all values.
+- Rewrote and improved `__getitem__` using single query with lazy evaluation.
+- Rewrote and improved `__setitem__` using single query with lazy evaluation.
+- Improved performance of aggregate functions by reducing query count.
+
+### Bug Fixes
+- Fixed a bug where binary operations between series with duplicate index values produces wrong result.
+- Fixed a bug for `fillna` where the fill value is not supposed to be applied to index columns, and also stay consistent with Snowflake type system without explicit casting to variant.
+- Fixed a bug where non-homogenous columns or indices were not converted correctly in `to_pandas`.
+
+### Changes
+- Error out when unsupported aggregation function is used.
+
+## 1.7.0a4 (2023-10-10)
+- Improved warning messages from `.to_datetime`
+
+### New Features
+- Added support for `DataFrame.to_dict` and `series.to_dict`.
+- Added support for `DataFrame.take` and `series.take`.
+- Added support for `pd.Series.isin`
+
+### Improvements
+- Rewrote and improved `.iloc` get with series key using single join query with lazy evaluation.
+- Updated docstring for `DataFrame.sort_values` and `Series.sort_value` APIs.
+- Updated docstring for `DataFrame.reset_index` and `Series.reset_index` APIs.
+- Removed unnecessary client side check and fallback for aggregation.
+
+### Bug Fixes
+- Fixed a bug where `.loc` and `.iloc` handle column indexers
+
+## 1.7.0a3 (2023-10-04)
+
+### New Features
+- Added support for `Series.dt.date`, `Series.dt.hour`, `Series.dt.minute` and `Series.dt.second`.
+
+### Bug Fixes
+- Fixed a bug where `DataFrame.dropna` used the original row position as new row positions after rows were dropped.
+- Fixed a bug where `.loc` uses a string as the column key.
+- Fixed a bug where `.iloc` pulls series key's index to client.
+- Fixed a bug where `DataFrame.join` calls `to_pandas()` unexpected.
+- Fixed a bug where some unsupported APIs didn't raise NotImplementedError.
+- Fixed a bug where binary operation `pow`, `rpow`, `__and__`, `__rand__`, `__or__`, `__ror__`, `__xor__`, and `__rxor__` calls frontend `default_to_pandas`.
+- Fixed a bug where creating DataFrame from shared database fails.
+
+## 1.7.0a2 (2023-09-20)
+
+### New Features
+- Added support for `pd.read_csv` by reading csv files on the client then uploading data to Snowflake.
+- Added support for binary arithmetic and comparison operators between series.
+- Added support for `pd.unique`.
+
+### Improvements
+- Improved performance for `head`, `tail`, `_repr_html_`, `loc`, `iloc`, `__getitem__` and `__setitem__`, `__repr__`.
+- Improved API documents for Snowpark pandas IO methods.
+- Improved error messages when using Snowpark pandas API with multiple Snowpark sessions.
+- Improved type conversion performance (from string to datetime).
+
+### Bug Fixes
+- Fixed a bug where an extra temp table was incorrectly created while using `pd.read_snowflake` to read a regular Snowflake table.
+- Fixed a bug where `df.pivot_table` failed when the original dataframe is created from large local data.
+- Fixed a bug when creating a Snowpark pandas DataFrame/Series from local numpy data that is not json-serializable.
+- Fixed a bug where `df.apply`, `series.apply` and `df.applymap` incorrectly convert SQL nulls to JSON nulls in Snowflake Variant data.
+- Fixed a bug where aggregation functions with `groupby` did not work on decimal columns.
+- Fixed a bug where the output `_repr_html_` and `__repr__` did not match pandas behavior.
+
+## 1.7.0a1 (2023-09-15)
+
+Start of Private Preview
diff --git a/src/snowflake/snowpark/modin/plugin/README.md b/src/snowflake/snowpark/modin/plugin/README.md
new file mode 100644
index 00000000000..88714962314
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/README.md
@@ -0,0 +1,108 @@
+## Developer setup for SnowPandas
+This guide is based on https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md, yet modified so it's possible to develop on both Snowpandas and Snowpark-Python in parallel.
+This is necessary due to version conflicts (i.e., the pandas/Arrow versions used).
+
+First, create new environment for SnowPandas
+
+```bash
+conda create --name snowpandas-dev python=3.9
+```
+
+Activate the environment via
+```bash
+conda activate snowpark-dev
+```
+
+Then install all dependencies via (from snowpark root!)
+```bash
+python -m pip install -e ".[development, modin-development]"
+pip3 install psutil
+
+# for demo
+pip install jupyter
+pip install matplotlib seaborn
+```
+
+## Folder structure
+Following tree diagram shows the high-level structure of the SnowPandas module within Snowpark.
+```bash
+snowflake
+└── snowpark
+    └── modin
+        └── pandas
+            ├── frontend          ← pandas API frontend layer
+            │   └── dispatching   ← additional patching for I/O
+            ├── translation       ← folder containing abstraction
+            │                       frontend to DF-algebra and query
+            │                       compiler
+            │   ├── _internal     ← Snowflake specific internals
+            │   ├── default2pandas← SnowPandas fallback to default pandas implementation
+            │   └── compiler      ← query compiler, Modin -> Snowpark
+            │                       DF/SnowSQL compilation logic.
+            └── utils             ← util classes from Modin, logging, …
+
+```
+
+## Doctests for Modin
+Modin uses a decorator `_inherit_docstrings` to equip functions with the original pandas' docstrings that my contain tests. By simply adding an import statement, these doctests can be run through the modin shim.
+Yet, not all tests pass currently which can be either due to missing pandas functionality within Modin or formatting errors between expected output and received output.
+
+For this reason, within `src/conftest.py` all Modin doctests have been deactivated. To activate them, simply comment the `pytest_ignore_collect` function. Modin doctests can be run from the repo root dir via
+```bash
+pytest -rP src/snowflake/snowpark/modin/pandas --log-cli-level=INFO
+```
+
+## Configuration file for connecting to Snowflake
+Snowflake Python Connector and Snowpark Python API now support creating a connection/session from a configuration file.
+Snowpark pandas API also offers the convenience of implicit session creation from a configuration file.
+This eliminates the need to explicitly create a Snowpark session in your code, allowing you to write your pandas code just as you would normally.
+To achieve this, you'll need to create a configuration file located at `~/.snowflake/connections.toml`.
+The contents of this configuration file should be as follows (following [TOML](https://toml.io/en/) file format):
+
+```python
+default_connection_name = "default"
+
+[default]
+account = "<myaccount>"
+user = "<myuser>"
+password = "<mypassword>"
+role="<myrole>"
+database = "<mydatabase>"
+schema = "<myschema>"
+warehouse = "<mywarehouse>"
+```
+
+The value of `default_connection_name` points to a configuration inside the TOML file, which will be used as the default configuration.
+Note that keys of a configuration (`account`, `user`) are the same as keys of connection parameters we use in `tests/parameters.py` and values of a configuration should be double quoted.
+
+## Git setup
+To setup a development version for Snowpark pandas API, run the following git commands:
+```
+git clone git@github.com:snowflakedb/snowpandas.git
+cd snowpandas
+git remote add upstream git@github.com:snowflakedb/snowpark-python.git
+git remote set-url --push upstream DISABLE
+
+# This should be the output when invoking the following command:
+# origin	git@github.com:snowflakedb/snowpandas.git (fetch)
+# origin	git@github.com:snowflakedb/snowpandas.git (push)
+# upstream	git@github.com:snowflakedb/snowpark-python.git (fetch)
+# upstream	DISABLE (push)
+git remote -v
+```
+### Branch
+- `pandas-main` is the local main branch which will have all changes for Snowpark pandas API.
+
+### Incorporate changes from the `upstream` Snowpark Python repo
+Assume on `pandas-main` branch
+```
+git checkout -b <your_branch>
+git fetch upstream
+git merge upstream/main
+git push
+```
+Submit a PR to merge your branch to `pandas-main` branch. This should be done regular or there are important changes from Snowpark.
+
+
+### Before PuPr
+When releasing Snowpark pandas API, merging this branch with the main via a PR should allow for a clean history.
diff --git a/src/snowflake/snowpark/modin/plugin/__init__.py b/src/snowflake/snowpark/modin/plugin/__init__.py
new file mode 100644
index 00000000000..076200e9543
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/__init__.py
@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from packaging import version
+
+# We need this import here to prevent circular dependency issues, since snowflake.snowpark.modin.pandas
+# currently imports some internal utilities from snowflake.snowpark.modin.plugin. Test cases will
+# import snowflake.snowpark.modin.plugin before snowflake.snowpark.modin.pandas, so in order to prevent
+# circular dependencies from manifesting, apparently snowflake.snowpark.modin.pandas needs to
+# be imported first.
+from snowflake.snowpark.modin import pandas  # noqa: F401
+from snowflake.snowpark.modin.config import DocModule
+from snowflake.snowpark.modin.plugin import docstrings
+
+DocModule.put(docstrings.__name__)
+
+install_msg = "Run `pip install snowflake-snowpark-python[modin]` to resolve."
+try:
+    import modin
+except ModuleNotFoundError:  # pragma: no cover
+    raise ModuleNotFoundError(
+        "Modin is not installed. " + install_msg
+    )  # pragma: no cover
+
+supported_modin_version = "0.28.1"
+if version.parse(modin.__version__) != version.parse(supported_modin_version):
+    raise ImportError(
+        "Installed Modin version is not supported. " + install_msg
+    )  # pragma: no cover
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/__init__.py b/src/snowflake/snowpark/modin/plugin/_internal/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/aggregation_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/aggregation_utils.py
new file mode 100644
index 00000000000..eb54d15b794
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/aggregation_utils.py
@@ -0,0 +1,1073 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+#
+# This file contains utils functions used by aggregation functions.
+#
+import functools
+from collections import defaultdict
+from collections.abc import Hashable, Iterable
+from functools import partial
+from typing import Any, Callable, Literal, NamedTuple, Optional, Union
+
+import numpy as np
+from pandas._typing import AggFuncType, AggFuncTypeBase
+from pandas.core.dtypes.common import (
+    is_dict_like,
+    is_list_like,
+    is_named_tuple,
+    is_numeric_dtype,
+    is_scalar,
+)
+
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark.column import CaseExpr, Column as SnowparkColumn
+from snowflake.snowpark.functions import (
+    Column,
+    array_agg,
+    array_construct,
+    array_construct_compact,
+    array_contains,
+    array_flatten,
+    array_max,
+    array_min,
+    array_position,
+    builtin,
+    cast,
+    coalesce,
+    col,
+    count,
+    count_distinct,
+    get,
+    greatest,
+    iff,
+    is_null,
+    least,
+    listagg,
+    lit,
+    max as max_,
+    mean,
+    median,
+    min as min_,
+    parse_json,
+    skew,
+    stddev,
+    stddev_pop,
+    sum as sum_,
+    var_pop,
+    variance,
+    when,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    from_pandas_label,
+    pandas_lit,
+    to_pandas_label,
+)
+from snowflake.snowpark.modin.plugin._typing import PandasLabelToSnowflakeIdentifierPair
+from snowflake.snowpark.types import (
+    BooleanType,
+    DataType,
+    DoubleType,
+    IntegerType,
+    StringType,
+)
+
+AGG_NAME_COL_LABEL = "AGG_FUNC_NAME"
+
+
+def array_agg_keepna(
+    column_to_aggregate: ColumnOrName, ordering_columns: Iterable[OrderingColumn]
+) -> Column:
+    """
+    Aggregate a column, including nulls, into an array by the given ordering columns.
+    """
+    # array_agg drops nulls, but we can use the solution [1] to work around
+    # that by turning each element `v` into the array `[v]`...
+    # except that we can't use array_construct(NULL) and instead have to use
+    # parse_json(lit("null")) per [2].
+    # [1] https://stackoverflow.com/a/77422662
+    # [2] https://github.com/snowflakedb/snowflake-connector-python/issues/1388#issuecomment-1371091831
+    return array_flatten(
+        array_agg(
+            array_construct(
+                iff(
+                    is_null(column_to_aggregate),
+                    parse_json(lit("null")),
+                    Column(column_to_aggregate),
+                )
+            )
+        ).within_group(
+            [ordering_column.snowpark_column for ordering_column in ordering_columns]
+        )
+    )
+
+
+def column_quantile(
+    column: SnowparkColumn,
+    interpolation: Literal["linear", "lower", "higher", "midpoint", "nearest"],
+    q: float,
+) -> SnowparkColumn:
+    assert interpolation in (
+        "linear",
+        "nearest",
+    ), f"unsupported interpolation method '{interpolation}'"
+    # PERCENTILE_CONT interpolates between the nearest values if needed, while
+    # PERCENTILE_DISC finds the nearest value
+    agg_method = "percentile_cont" if interpolation == "linear" else "percentile_disc"
+    # PERCENTILE_* returns DECIMAL; we cast to DOUBLE
+    # example sql: SELECT CAST(PERCENTILE_COUNT(0.25) WITHIN GROUP(ORDER BY a) AS DOUBLE) AS a FROM table
+    return builtin(agg_method)(pandas_lit(q)).within_group(column).cast(DoubleType())
+
+
+def _columns_coalescing_idxmax_idxmin_helper(
+    *cols: SnowparkColumn,
+    axis: Literal[0, 1],
+    func: Literal["idxmax", "idxmin"],
+    keepna: bool,
+    pandas_column_labels: list,
+    is_groupby: bool = False,
+) -> SnowparkColumn:
+    """
+    Computes the index corresponding to the func for each row if axis=1 or column if axis=0.
+    If all values in a row/column are NaN, then the result will be NaN.
+
+    Parameters
+    ----------
+    *cols: SnowparkColumn
+        A tuple of Snowpark Columns.
+    axis: {0, 1}
+        The axis to apply the func on.
+    func: {"idxmax", "idxmin"}
+        The function to apply.
+    keepna: bool
+        Whether to skip NaN Values.
+    pandas_column_labels: list
+        pandas index/column names.
+
+    Returns
+    -------
+    Callable
+    """
+    if axis == 0:
+        extremum = max_(*cols) if func == "idxmax" else min_(*cols)
+
+        # TODO SNOW-1316602: Support MultiIndex for DataFrame, Series, and DataFrameGroupBy cases.
+        if len(pandas_column_labels) > 1:
+            # The index is a MultiIndex, current logic does not support this.
+            raise NotImplementedError(
+                f"{func} is not yet supported when the index is a MultiIndex."
+            )
+
+        # TODO SNOW-1270521: max_by and min_by are not guaranteed to break tiebreaks deterministically
+        extremum_position = (
+            get(
+                builtin("max_by")(
+                    Column(pandas_column_labels[0]),
+                    Column(*cols),
+                    1,
+                ),
+                0,
+            )
+            if func == "idxmax"
+            else get(
+                builtin("min_by")(
+                    Column(pandas_column_labels[0]),
+                    Column(*cols),
+                    1,
+                ),
+                0,
+            )
+        )
+
+        if is_groupby and keepna:
+            # When performing groupby, if a group has any NaN values in its column, the idxmax/idxmin of that column
+            # will always be NaN. Therefore, we need to check whether there are any NaN values in each group.
+            return iff(
+                builtin("count_if")(Column(*cols).is_null()) > 0,
+                pandas_lit(None),
+                extremum_position,
+            )
+        else:
+            # if extremum is null, i.e. there are no columns or all columns are
+            # null, mark extremum_position as null, because our final expression has
+            # to evaluate to null.
+            return builtin("nvl2")(extremum, extremum_position, lit(None))
+
+    else:
+        column_array = array_construct(*cols)
+        # extremum is null if there are no columns or all columns are null.
+        # otherwise, extremum contains the extremal column, i.e. the max column for
+        # idxmax and the min column for idxmin.
+        extremum = (array_max if func == "idxmax" else array_min)(column_array)
+        # extremum_position is the position of the first column with a value equal
+        # to extremum.
+        extremum_position = array_position(extremum, column_array)
+
+        if keepna:
+            # if any of the columns is null, mark extremum_position as null,
+            # because our final expression has to evaluate to null. That's how we
+            # "keep NA."
+            extremum_position = iff(
+                array_contains(lit(None), column_array), lit(None), extremum_position
+            )
+        else:
+            # if extremum is null, i.e. there are no columns or all columns are
+            # null, mark extremum_position as null, because our final expression has
+            # to evalute to null.
+            extremum_position = builtin("nvl2")(extremum, extremum_position, lit(None))
+
+        # If extremum_position is null, return null.
+        return builtin("nvl2")(
+            extremum_position,
+            # otherwise, we create an array of all the column names using pandas_column_labels
+            # and get the element of that array that is at extremum_position.
+            get(
+                array_construct(*(lit(c) for c in pandas_column_labels)),
+                cast(extremum_position, "int"),
+            ),
+            lit(None),
+        )
+
+
+# Map between the pandas input aggregation function (str or numpy function) and
+# the corresponding snowflake builtin aggregation function for axis=0.
+SNOWFLAKE_BUILTIN_AGG_FUNC_MAP: dict[Union[str, Callable], Callable] = {
+    "count": count,
+    "mean": mean,
+    "min": min_,
+    "max": max_,
+    "idxmax": functools.partial(
+        _columns_coalescing_idxmax_idxmin_helper, func="idxmax"
+    ),
+    "idxmin": functools.partial(
+        _columns_coalescing_idxmax_idxmin_helper, func="idxmin"
+    ),
+    "sum": sum_,
+    "median": median,
+    "skew": skew,
+    "std": stddev,
+    "var": variance,
+    "booland_agg": builtin("booland_agg"),
+    "boolor_agg": builtin("boolor_agg"),
+    np.max: max_,
+    np.min: min_,
+    np.sum: sum_,
+    np.mean: mean,
+    np.median: median,
+    np.std: stddev,
+    np.var: variance,
+    "array_agg": array_agg,
+    "quantile": column_quantile,
+    "nunique": count_distinct,
+}
+
+
+class AggFuncInfo(NamedTuple):
+    """
+    Information needed to distinguish between dummy and normal aggregate functions.
+    """
+
+    # The aggregate function
+    func: AggFuncTypeBase
+
+    # If true, the aggregate function is applied to "NULL" rather than a column
+    is_dummy_agg: bool
+
+
+def _columns_coalescing_min(*cols: SnowparkColumn) -> Callable:
+    """
+    Computes the minimum value in each row, skipping NaN values. If all values in a row are NaN,
+    then the result will be NaN.
+
+    Example SQL:
+    SELECT ARRAY_MIN(ARRAY_CONSTRUCT_COMPACT(a, b, c)) AS min
+    FROM VALUES (10, 1, NULL), (NULL, NULL, NULL) AS t (a, b, c);
+
+    Result:
+    --------
+    |  min |
+    --------
+    |    1 |
+    --------
+    | NULL |
+    --------
+    """
+    return array_min(array_construct_compact(*cols))
+
+
+def _columns_coalescing_max(*cols: SnowparkColumn) -> Callable:
+    """
+    Computes the maximum value in each row, skipping NaN values. If all values in a row are NaN,
+    then the result will be NaN.
+
+    Example SQL:
+    SELECT ARRAY_MAX(ARRAY_CONSTRUCT_COMPACT(a, b, c)) AS max
+    FROM VALUES (10, 1, NULL), (NULL, NULL, NULL) AS t (a, b, c);
+
+    Result:
+    --------
+    |  max |
+    --------
+    |   10 |
+    --------
+    | NULL |
+    --------
+    """
+    return array_max(array_construct_compact(*cols))
+
+
+def _columns_count(*cols: SnowparkColumn) -> Callable:
+    """
+    Counts the number of non-NULL values in each row.
+
+    Example SQL:
+    SELECT NVL2(a, 1, 0) + NVL2(b, 1, 0) + NVL2(c, 1, 0) AS count
+    FROM VALUES (10, 1, NULL), (NULL, NULL, NULL) AS t (a, b, c);
+
+    Result:
+    ---------
+    | count |
+    ---------
+    |     2 |
+    ---------
+    |     0 |
+    ---------
+    """
+    # IMPORTANT: count and sum use python builtin sum to invoke __add__ on each column rather than Snowpark
+    # sum_, since Snowpark sum_ gets the sum of all rows within a single column.
+    # NVL2(col, x, y) returns x if col is NULL, and y otherwise.
+    return sum(builtin("nvl2")(col, pandas_lit(1), pandas_lit(0)) for col in cols)
+
+
+def _columns_coalescing_sum(*cols: SnowparkColumn) -> Callable:
+    """
+    Sums all non-NaN elements in each row. If all elements are NaN, returns 0.
+
+    Example SQL:
+    SELECT ZEROIFNULL(a) + ZEROIFNULL(b) + ZEROIFNULL(c) AS sum
+    FROM VALUES (10, 1, NULL), (NULL, NULL, NULL) AS t (a, b, c);
+
+    Result:
+    -------
+    | sum |
+    -------
+    |  11 |
+    -------
+    |   0 |
+    -------
+    """
+    # IMPORTANT: count and sum use python builtin sum to invoke __add__ on each column rather than Snowpark
+    # sum_, since Snowpark sum_ gets the sum of all rows within a single column.
+    return sum(builtin("zeroifnull")(col) for col in cols)
+
+
+# Map between the pandas input aggregation function (str or numpy function) and
+# the corresponding aggregation function for axis=1 when skipna=True. The returned aggregation
+# function may either  be a builtin aggregation function, or a function taking in *arg columns
+# that then calls the appropriate builtin aggregations.
+SNOWFLAKE_COLUMNS_AGG_FUNC_MAP: dict[Union[str, Callable], Callable] = {
+    "count": _columns_count,
+    "sum": _columns_coalescing_sum,
+    np.sum: _columns_coalescing_sum,
+    "min": _columns_coalescing_min,
+    "max": _columns_coalescing_max,
+    "idxmax": _columns_coalescing_idxmax_idxmin_helper,
+    "idxmin": _columns_coalescing_idxmax_idxmin_helper,
+    np.min: _columns_coalescing_min,
+    np.max: _columns_coalescing_max,
+}
+
+# These functions are called instead if skipna=False
+SNOWFLAKE_COLUMNS_KEEPNA_AGG_FUNC_MAP: dict[Union[str, Callable], Callable] = {
+    "min": least,
+    "max": greatest,
+    "idxmax": _columns_coalescing_idxmax_idxmin_helper,
+    "idxmin": _columns_coalescing_idxmax_idxmin_helper,
+    # IMPORTANT: count and sum use python builtin sum to invoke __add__ on each column rather than Snowpark
+    # sum_, since Snowpark sum_ gets the sum of all rows within a single column.
+    "sum": lambda *cols: sum(cols),
+    np.sum: lambda *cols: sum(cols),
+    np.min: least,
+    np.max: greatest,
+}
+
+
+class AggregateColumnOpParameters(NamedTuple):
+    """
+    Parameters/Information needed to apply aggregation on a Snowpark column correctly.
+    """
+
+    # Snowflake quoted identifier for the column to apply aggregation on
+    snowflake_quoted_identifier: ColumnOrName
+
+    # The Snowpark data type for the column to apply aggregation on
+    data_type: DataType
+
+    # pandas label for the new column produced after aggregation
+    agg_pandas_label: Optional[Hashable]
+
+    # Snowflake quoted identifier for the new Snowpark column produced after aggregation
+    agg_snowflake_quoted_identifier: str
+
+    # the snowflake aggregation function to apply on the column
+    snowflake_agg_func: Callable
+
+    # the columns specifying the order of rows in the column. This is only
+    # relevant for aggregations that depend on row order, e.g. summing a string
+    # column.
+    ordering_columns: Iterable[OrderingColumn]
+
+
+def is_snowflake_agg_func(agg_func: AggFuncTypeBase) -> bool:
+    return agg_func in SNOWFLAKE_BUILTIN_AGG_FUNC_MAP
+
+
+def get_snowflake_agg_func(
+    agg_func: AggFuncTypeBase, agg_kwargs: dict[str, Any], axis: int = 0
+) -> Optional[Callable]:
+    """
+    Get the corresponding Snowflake/Snowpark aggregation function for the given aggregation function.
+    If no corresponding snowflake aggregation function can be found, return None.
+    """
+    if axis == 0:
+        snowflake_agg_func = SNOWFLAKE_BUILTIN_AGG_FUNC_MAP.get(agg_func)
+        if snowflake_agg_func == stddev or snowflake_agg_func == variance:
+            # for aggregation function std and var, we only support ddof = 0 or ddof = 1.
+            # when ddof is 1, std is mapped to stddev, var is mapped to variance
+            # when ddof is 0, std is mapped to stddev_pop, var is mapped to var_pop
+            # TODO (SNOW-892532): support std/var for ddof that is not 0 or 1
+            ddof = agg_kwargs.get("ddof", 1)
+            if ddof != 1 and ddof != 0:
+                return None
+            if ddof == 0:
+                return stddev_pop if snowflake_agg_func == stddev else var_pop
+        elif snowflake_agg_func == column_quantile:
+            interpolation = agg_kwargs.get("interpolation", "linear")
+            q = agg_kwargs.get("q", 0.5)
+            if interpolation not in ("linear", "nearest"):
+                return None
+            if not is_scalar(q):
+                # SNOW-1062878 Because list-like q would return multiple rows, calling quantile
+                # through the aggregate frontend in this manner is unsupported.
+                return None
+            return lambda col: column_quantile(col, interpolation, q)
+    else:
+        snowflake_agg_func = SNOWFLAKE_COLUMNS_AGG_FUNC_MAP.get(agg_func)
+
+    return snowflake_agg_func
+
+
+def generate_rowwise_aggregation_function(
+    agg_func: AggFuncTypeBase, agg_kwargs: dict[str, Any]
+) -> Optional[Callable]:
+    """
+    Get a callable taking *arg columns to apply for an aggregation.
+
+    Unlike get_snowflake_agg_func, this function may return a wrapped composition of
+    Snowflake builtin functions depending on the values of the specified kwargs.
+    """
+    snowflake_agg_func = SNOWFLAKE_COLUMNS_AGG_FUNC_MAP.get(agg_func)
+    if not agg_kwargs.get("skipna", True):
+        snowflake_agg_func = SNOWFLAKE_COLUMNS_KEEPNA_AGG_FUNC_MAP.get(
+            agg_func, snowflake_agg_func
+        )
+    min_count = agg_kwargs.get("min_count", 0)
+    if min_count > 0:
+        # Create a case statement to check if the number of non-null values exceeds min_count
+        # when min_count > 0, if the number of not NULL values is < min_count, return NULL.
+        def agg_func_wrapper(fn: Callable) -> Callable:
+            return lambda *cols: when(
+                _columns_count(*cols) < min_count, pandas_lit(None)
+            ).otherwise(fn(*cols))
+
+        return snowflake_agg_func and agg_func_wrapper(snowflake_agg_func)
+    return snowflake_agg_func
+
+
+def is_supported_snowflake_agg_func(
+    agg_func: AggFuncTypeBase, agg_kwargs: dict[str, Any], axis: int
+) -> bool:
+    """
+    check if the aggregation function is supported with snowflake. Current supported
+    aggregation functions are the functions that can be mapped to snowflake builtin function.
+
+    Args:
+        agg_func: str or Callable. the aggregation function to check
+        agg_kwargs: keyword argument passed for the aggregation function, such as ddof, min_count etc.
+                    The value can be different for different aggregation functions.
+    Returns:
+        is_valid: bool. Whether it is valid to implement with snowflake or not.
+    """
+    return get_snowflake_agg_func(agg_func, agg_kwargs, axis) is not None
+
+
+def are_all_agg_funcs_supported_by_snowflake(
+    agg_funcs: list[AggFuncTypeBase], agg_kwargs: dict[str, Any], axis: int
+) -> bool:
+    """
+    Check if all aggregation functions in the given list are snowflake supported
+    aggregation functions.
+
+    Returns:
+        True if all functions in the list are snowflake supported aggregation functions, otherwise,
+        return False.
+    """
+    return all(
+        is_supported_snowflake_agg_func(func, agg_kwargs, axis) for func in agg_funcs
+    )
+
+
+def check_is_aggregation_supported_in_snowflake(
+    agg_func: AggFuncType,
+    agg_kwargs: dict[str, Any],
+    axis: int,
+) -> bool:
+    """
+    check if distributed implementation with snowflake is available for the aggregation
+    based on the input arguments.
+
+    Args:
+        agg_func: the aggregation function to apply
+        agg_kwargs: keyword argument passed for the aggregation function, such as ddof, min_count etc.
+                    The value can be different for different aggregation function.
+    Returns:
+        bool
+            Whether the aggregation operation can be executed with snowflake sql engine.
+    """
+    # validate agg_func, only snowflake builtin agg function or dict of snowflake builtin agg
+    # function can be implemented in distributed way.
+    if is_dict_like(agg_func):
+        return all(
+            (
+                are_all_agg_funcs_supported_by_snowflake(value, agg_kwargs, axis)
+                if is_list_like(value)
+                else is_supported_snowflake_agg_func(value, agg_kwargs, axis)
+            )
+            for value in agg_func.values()
+        )
+    elif is_list_like(agg_func):
+        return are_all_agg_funcs_supported_by_snowflake(agg_func, agg_kwargs, axis)
+    return is_supported_snowflake_agg_func(agg_func, agg_kwargs, axis)
+
+
+def is_snowflake_numeric_type_required(snowflake_agg_func: Callable) -> bool:
+    """
+    Is the given snowflake aggregation function needs to be applied on the numeric column.
+    """
+    return snowflake_agg_func in [
+        mean,
+        median,
+        skew,
+        sum_,
+        stddev,
+        stddev_pop,
+        variance,
+        var_pop,
+        column_quantile,
+    ]
+
+
+def drop_non_numeric_data_columns(
+    query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",  # type: ignore[name-defined] # noqa: F821
+    pandas_labels_for_columns_to_exclude: list[Hashable],
+) -> "snowflake_query_compiler.SnowflakeQueryCompiler":  # type: ignore[name-defined] # noqa: F821
+    """
+    Drop the data columns of the internal frame that are non-numeric if numeric_only is True.
+
+    Args:
+        query_compiler: The query compiler for the internal frame to process on
+        pandas_labels_for_columns_to_exclude: List of pandas labels to exclude from dropping even if the
+            corresponding column is non-numeric.
+    Returns:
+        SnowflakeQueryCompiler that contains the processed new frame with non-numeric data columns dropped
+    """
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    original_frame = query_compiler._modin_frame
+    # get all data column to retain, a data column is retained if the pandas label for the column
+    data_column_to_retain: list[PandasLabelToSnowflakeIdentifierPair] = [
+        PandasLabelToSnowflakeIdentifierPair(
+            original_frame.data_column_pandas_labels[i],
+            original_frame.data_column_snowflake_quoted_identifiers[i],
+        )
+        for i, data_type in enumerate(query_compiler.dtypes.values)
+        if is_numeric_dtype(data_type)
+        or (
+            original_frame.data_column_pandas_labels[i]
+            in pandas_labels_for_columns_to_exclude
+        )
+    ]
+
+    # get the original pandas labels and snowflake quoted identifiers for the numeric data columns
+    new_data_column_pandas_labels: list[Hashable] = [
+        col.pandas_label for col in data_column_to_retain
+    ]
+    new_data_column_snowflake_quoted_identifiers: list[str] = [
+        col.snowflake_quoted_identifier for col in data_column_to_retain
+    ]
+
+    return SnowflakeQueryCompiler(
+        InternalFrame.create(
+            ordered_dataframe=original_frame.ordered_dataframe,
+            data_column_pandas_labels=new_data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=original_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=original_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=original_frame.index_column_snowflake_quoted_identifiers,
+        )
+    )
+
+
+def generate_aggregation_column(
+    agg_column_op_params: AggregateColumnOpParameters,
+    agg_kwargs: dict[str, Any],
+    is_groupby_agg: bool,
+    index_column_snowflake_quoted_identifier: Optional[list[str]] = None,
+) -> SnowparkColumn:
+    """
+    Generate the aggregation column for the given column and aggregation function.
+
+    Args:
+        agg_column_op_params: AggregateColumnOpParameters. The aggregation parameter for a Snowpark column, contains following:
+            - snowflake_quoted_identifier: the snowflake quoted identifier for the column to apply aggregation on
+            - data_type: the Snowpark datatype for the column to apply aggregation on
+            - agg_snowflake_quoted_identifier: The snowflake quoted identifier used for the result column after aggregation
+            - snowflake_agg_func: The Snowflake aggregation function to apply on the given column
+            - ordering_columns: the list of snowflake quoted identifiers corresponding to the ordering columns
+        agg_kwargs: keyword argument passed for the aggregation function, such as ddof, min_count etc.
+        is_groupby_agg: is the aggregation function applied after groupby or not.
+        index_column_snowflake_quoted_identifier: The Snowflake quoted identifier corresponding to the index column
+
+    Returns:
+        SnowparkColumn after the aggregation function. The column is also aliased back to the original name
+    """
+    snowpark_column = agg_column_op_params.snowflake_quoted_identifier
+    snowflake_agg_func = agg_column_op_params.snowflake_agg_func
+    if is_snowflake_numeric_type_required(snowflake_agg_func) and isinstance(
+        agg_column_op_params.data_type, BooleanType
+    ):
+        # if the column is a boolean column and the aggregation function requires numeric values,
+        # we cast the boolean column to integer (True mapped to 1, and False mapped to 0). This is
+        # to stay consistent with pandas behavior, where boolean type in pandas is treated as numeric type.
+        snowpark_column = cast(
+            agg_column_op_params.snowflake_quoted_identifier, IntegerType()
+        )
+
+    if snowflake_agg_func == sum_:
+        if isinstance(agg_column_op_params.data_type, StringType):
+            agg_snowpark_column = listagg(snowpark_column).within_group(
+                [
+                    ordering_column.snowpark_column
+                    for ordering_column in agg_column_op_params.ordering_columns
+                ]
+            )
+        else:
+            # There is a slightly different behavior for sum in terms of missing value in pandas and Snowflake,
+            # where sum on a column with all NaN in pandas result in 0, but sum on a column with all NULL result
+            # in NULL. Therefore, a post process on the result to replace the NULL result with 0 using coalesce.
+            agg_snowpark_column = coalesce(
+                snowflake_agg_func(snowpark_column), pandas_lit(0)
+            )
+    elif snowflake_agg_func in (
+        SNOWFLAKE_BUILTIN_AGG_FUNC_MAP["booland_agg"],
+        SNOWFLAKE_BUILTIN_AGG_FUNC_MAP["boolor_agg"],
+    ):
+        # Need to wrap column name in IDENTIFIER, or else bool agg function will treat the name as a string literal
+        agg_snowpark_column = snowflake_agg_func(builtin("identifier")(snowpark_column))
+    elif snowflake_agg_func == array_agg:
+        # Array aggregation requires the ordering columns, which we have to
+        # pass in here.
+        # note that we always assume keepna for array_agg. TODO(SNOW-1040398):
+        # make keepna treatment consistent across array_agg and other
+        # aggregation methods.
+        agg_snowpark_column = array_agg_keepna(
+            snowpark_column, ordering_columns=agg_column_op_params.ordering_columns
+        )
+    elif (
+        isinstance(snowflake_agg_func, partial)
+        and snowflake_agg_func.func == _columns_coalescing_idxmax_idxmin_helper
+    ):
+        agg_snowpark_column = _columns_coalescing_idxmax_idxmin_helper(
+            snowpark_column,
+            axis=0,
+            func=snowflake_agg_func.keywords["func"],
+            keepna=not agg_kwargs.get("skipna", True),
+            pandas_column_labels=index_column_snowflake_quoted_identifier,  # type: ignore
+            is_groupby=is_groupby_agg,
+        )
+    elif snowflake_agg_func == count_distinct:
+        if agg_kwargs.get("dropna", True) is False:
+            # count_distinct does only count distinct non-NULL values.
+            # Check if NULL is contained, then add +1 in this case.
+            if not isinstance(snowpark_column, SnowparkColumn):
+                snowpark_column = col(snowpark_column)
+            agg_snowpark_column = snowflake_agg_func(snowpark_column) + iff(
+                sum_(snowpark_column.is_null().cast(IntegerType())) > pandas_lit(0),
+                pandas_lit(1),
+                pandas_lit(0),
+            )
+        else:
+            agg_snowpark_column = snowflake_agg_func(snowpark_column)
+    else:
+        agg_snowpark_column = snowflake_agg_func(snowpark_column)
+
+    # Handle min_count and skipna parameters
+    min_count = -1
+    skipna = True
+    is_groupby_min_max = is_groupby_agg and snowflake_agg_func in [min_, max_]
+    if snowflake_agg_func is sum_ or is_groupby_min_max:
+        # min_count parameter is only valid for groupby min/max/sum, dataframe sum and series sum
+        min_count = agg_kwargs.get("min_count", -1)
+    if not is_groupby_agg:
+        # skipna parameter is valid for all supported none-groupby aggregation function
+        skipna = agg_kwargs.get("skipna", True)
+
+    if not skipna or min_count > 0:
+        case_expr: Optional[CaseExpr] = None
+        if not skipna:
+            # TODO(SNOW-1040398): Use a different aggregation function map for
+            # skipna=False, and set the skipna value at an earlier layer.
+            # when skipna is False, return NULL as far as there is NULL in the column. This is achieved by first
+            # converting the column to boolean with is_null, and call max on the boolean column. If NULL exists,
+            # the result of max will be True, otherwise, False.
+            # For example: [1, NULL, 2, 3] will be [False, True, False, False] with is_null, and max on the boolean
+            # result is True.
+            case_expr = when(
+                max_(is_null(agg_column_op_params.snowflake_quoted_identifier)),
+                pandas_lit(None),
+            )
+        if min_count > 0:
+            # when min_count > 0, if the number of not NULL values is < min_count, return NULL.
+            min_count_cond = (
+                count(agg_column_op_params.snowflake_quoted_identifier) < min_count
+            )
+            case_expr = (
+                case_expr.when(min_count_cond, pandas_lit(None))
+                if (case_expr is not None)
+                else when(min_count_cond, pandas_lit(None))
+            )
+
+        assert (
+            case_expr is not None
+        ), f"No case expression is constructed with skipna({skipna}), min_count({min_count})"
+        agg_snowpark_column = case_expr.otherwise(agg_snowpark_column)
+
+    # rename the column to agg_column_quoted_identifier
+    agg_snowpark_column = agg_snowpark_column.as_(
+        agg_column_op_params.agg_snowflake_quoted_identifier
+    )
+
+    return agg_snowpark_column
+
+
+def aggregate_with_ordered_dataframe(
+    ordered_dataframe: OrderedDataFrame,
+    agg_col_ops: list[AggregateColumnOpParameters],
+    agg_kwargs: dict[str, Any],
+    groupby_columns: Optional[list[str]] = None,
+    index_column_snowflake_quoted_identifier: Optional[list[str]] = None,
+) -> OrderedDataFrame:
+    """
+    Perform aggregation on the snowpark dataframe based on the given column to aggregation function map.
+
+    Args:
+        ordered_dataframe: a OrderedDataFrame to perform aggregation on
+        agg_col_ops: mapping between the columns to apply aggregation on and the corresponding aggregation to apply
+        agg_kwargs: keyword argument passed for the aggregation function, such as ddof, min_count etc.
+        groupby_columns: If provided, groupby the dataframe with the given columns before apply aggregate. Otherwise,
+                no groupby will be performed.
+        index_column_snowflake_quoted_identifier: The Snowflake quoted identifier corresponding to the index column
+
+    Returns:
+        OrderedDataFrame with all aggregated columns.
+    """
+
+    is_groupby_agg = groupby_columns is not None
+    agg_list: list[SnowparkColumn] = [
+        generate_aggregation_column(
+            agg_column_op_params=agg_col_op,
+            agg_kwargs=agg_kwargs,
+            is_groupby_agg=is_groupby_agg,
+            index_column_snowflake_quoted_identifier=index_column_snowflake_quoted_identifier,
+        )
+        for agg_col_op in agg_col_ops
+    ]
+
+    if is_groupby_agg:
+        agg_ordered_dataframe = ordered_dataframe.group_by(groupby_columns, *agg_list)
+    else:
+        agg_ordered_dataframe = ordered_dataframe.agg(*agg_list)
+    return agg_ordered_dataframe
+
+
+def convert_agg_func_arg_to_col_agg_func_map(
+    internal_frame: InternalFrame,
+    agg_func: AggFuncType,
+    pandas_labels_for_columns_to_exclude_when_agg_on_all: list[Hashable],
+) -> dict[
+    PandasLabelToSnowflakeIdentifierPair, Union[AggFuncTypeBase, list[AggFuncTypeBase]]
+]:
+    """
+    Convert the agg_func arguments to column to aggregation function maps, which is a map between
+    the Snowpark pandas column (represented as a PandasLabelToSnowflakeIdentifierPair) to the corresponding
+    aggregation functions needs to be applied on this column. Following rules are applied:
+    1) If agg_func is a base aggregation (str or callable) or a list of base aggregation function, then all
+        aggregation functions are applied on each data column of the internal frame.
+    2) If agg_func is already in a dict format (column label to aggregation functions map), only the columns
+        occur in the dictionary key is considered for aggregation.
+
+    Args:
+        internal_frame: InternalFrame. The internal frame to apply aggregation on
+        agg_func: AggFuncType (str or callable, or a list of str or callable, or a dict between label and str or callable or list of str or callable)
+            The aggregations functions to apply on the internal frame.
+        pandas_labels_for_columns_to_exclude_when_agg_on_all: List[Hashable]
+            List of pandas labels for the columns to exclude from aggregation when the aggregation needs to be applied on
+            all data columns, which is the case when rule 1) described above is applied.
+
+    Returns:
+        Dict[PandasLabelToSnowflakeIdentifierPair, Union[AggFuncTypeBase, List[AggFuncTypeBase]]]
+            Map between Snowpandas column and the aggregation functions needs to be applied on the column
+    """
+    col_agg_func_map: dict[
+        PandasLabelToSnowflakeIdentifierPair,
+        Union[AggFuncTypeBase, list[AggFuncTypeBase]],
+    ] = {}
+
+    if is_dict_like(agg_func):
+        for label, fn in agg_func.items():
+            # for each column configured in the map, look for the corresponding columns
+            col_quoted_identifiers = (
+                internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    [label],
+                    include_index=False,
+                )
+            )[0]
+
+            for quoted_identifier in col_quoted_identifiers:
+                col_agg_func_map[
+                    PandasLabelToSnowflakeIdentifierPair(label, quoted_identifier)
+                ] = fn
+    else:
+        # if the aggregation function is str or callable or a list of str or callable, apply the aggregations
+        # functions on each data column.
+        for label, quoted_identifier in zip(
+            internal_frame.data_column_pandas_labels,
+            internal_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            if label not in pandas_labels_for_columns_to_exclude_when_agg_on_all:
+                col_agg_func_map[
+                    PandasLabelToSnowflakeIdentifierPair(label, quoted_identifier)
+                ] = agg_func
+
+    return col_agg_func_map
+
+
+def get_agg_func_to_col_map(
+    col_to_agg_func_map: dict[
+        PandasLabelToSnowflakeIdentifierPair,
+        Union[AggFuncTypeBase, list[AggFuncTypeBase]],
+    ]
+) -> dict[AggFuncTypeBase, list[PandasLabelToSnowflakeIdentifierPair]]:
+    """
+    Convert the column to aggregation function map to aggregation function to columns map, and keeps the order of
+    the occurrence in the original map.
+
+    For example:
+        Given col_to_agg_func_map {(col1, "col1") : ["min", "max"], (col2, "col2"): ["max", "sum"]}
+        The aggregation func to columns map is {"min": [(col1, "col1")], "max": [(col1, "col1"), (col2, "col2")], "sum": [(col2, "col2")]}
+    """
+    agg_func_to_col_map: dict[
+        AggFuncTypeBase, list[PandasLabelToSnowflakeIdentifierPair]
+    ] = defaultdict(list)
+    for column_identifier, agg_funcs in col_to_agg_func_map.items():
+        # iterate over each aggregation function
+        agg_funcs_list = agg_funcs if is_list_like(agg_funcs) else [agg_funcs]
+        for agg_func in agg_funcs_list:
+            agg_func_to_col_map[agg_func].append(column_identifier)
+
+    return agg_func_to_col_map
+
+
+def get_pandas_aggr_func_name(aggfunc: AggFuncTypeBase) -> str:
+    """
+    Returns the friendly name for the aggr function.  For example, if it is a callable, it will return __name__
+    otherwise the same string name value.
+    """
+    return (
+        getattr(aggfunc, "__name__", str(aggfunc))
+        if not isinstance(aggfunc, str)
+        else aggfunc
+    )
+
+
+def generate_pandas_labels_for_agg_result_columns(
+    pandas_label: Hashable,
+    num_levels: int,
+    agg_func_list: list[AggFuncTypeBase],
+    include_agg_func_in_agg_label: bool,
+    include_pandas_label_in_agg_label: bool,
+) -> list[Hashable]:
+    """
+    Generate the pandas labels for the result columns after apply agg_func to the pandas column with given
+    pandas label. One aggregation column will be produced for each aggregation function in the given list. If
+    include_agg_func_in_agg_label is true, the aggregation function name will be appended to the original pandas
+    label to produce the new pandas label, otherwise the original pandas label is used.
+    For example: Given pandas label 'A', and agg_func [min, max]
+        if include_agg_func_in_agg_label is False and include_pandas_label_in_agg_label is True, the result labels will be ['A', 'A']
+        if include_agg_func_in_agg_label is True and include_pandas_label_in_agg_label is True, the result labels will be [('A', 'min'), ('A', 'max')]
+        if include_agg_func_in_agg_label is True and include_pandas_label_in_agg_label is False, the result label will be ('min', 'max')
+
+    Note that include_agg_func_in_agg_label and include_pandas_label_in_agg_label can not be both False.
+
+    Args:
+        pandas_label: Hashable
+            The pandas label for the column to apply aggregation function on
+        num_levels: int
+            The number of levels for the pandas label
+        agg_func_list: List[AggFuncTypeBase]
+            List of aggregation functions to be applied on the pandas column
+        include_agg_func_in_agg_label: bool
+            Whether to include the aggregation function in the label for the aggregation result column
+        include_pandas_label_in_agg_label: bool,
+            Whether to include the original pandas label in the label for the aggregation result column
+
+    Returns:
+        List[Hashable]
+            List of pandas labels for the result aggregation columns, the length is the same as agg_func_list.
+    """
+    assert (
+        include_pandas_label_in_agg_label or include_agg_func_in_agg_label
+    ), "the result aggregation label must at least contain at least the original label or the aggregation function name."
+    agg_func_column_labels = []
+    for agg_func in agg_func_list:
+        label_tuple = (
+            from_pandas_label(pandas_label, num_levels)
+            if include_pandas_label_in_agg_label
+            else ()
+        )
+        aggr_func_label = (
+            (get_pandas_aggr_func_name(agg_func),)
+            if include_agg_func_in_agg_label
+            else ()
+        )
+        label_tuple = label_tuple + aggr_func_label
+        agg_func_column_labels.append(to_pandas_label(label_tuple))
+
+    return agg_func_column_labels
+
+
+def generate_column_agg_info(
+    internal_frame: InternalFrame,
+    column_to_agg_func: dict[
+        PandasLabelToSnowflakeIdentifierPair,
+        Union[AggFuncInfo, list[AggFuncInfo]],
+    ],
+    agg_kwargs: dict[str, Any],
+    include_agg_func_only_in_result_label: bool,
+) -> tuple[list[AggregateColumnOpParameters], list[Hashable]]:
+    """
+    Generate the ColumnAggregationInfo for the internal frame based on the column_to_agg_func map.
+
+    Args:
+        internal_frame: InternalFrame
+            The internal frame to apply aggregation on
+        column_to_agg_func: Dict[PandasLabelToSnowflakeIdentifierPair, Union[AggFuncInfo, List[AggFuncInfo]]],
+            Map between the Snowpark pandas column needs to apply aggregation on and the aggregation functions to apply
+            for the column. The Snowpark pandas column is represented as a pair of the pandas label and the quoted
+            identifier for the columns. The aggregation function can be marked as dummy. In this case, it will be
+            applied to "Null" rahter than the column.
+        agg_kwargs: Dict[str, Any]
+            keyword argument passed for the aggregation function
+        include_agg_func_only_in_result_label: bool
+            should the result label only contains the aggregation function name if it is included in the result label.
+
+
+    Returns:
+        List[AggregateColumnOpParameters]
+            Each AggregateColumnOpParameters contains information of the quoted identifier for the column to apply
+            aggregation on, the snowflake aggregation function to apply on the column, and the quoted identifier
+            and pandas label to use for the result aggregation column.
+        List[Hashable]
+            The new index data column index names for the dataframe after aggregation
+    """
+
+    quoted_identifier_to_snowflake_type: dict[
+        str, DataType
+    ] = internal_frame.quoted_identifier_to_snowflake_type()
+    num_levels: int = internal_frame.num_index_levels(axis=1)
+    # reserve all index column name and ordering column names
+    identifiers_to_exclude: list[str] = (
+        internal_frame.index_column_snowflake_quoted_identifiers
+        + internal_frame.ordering_column_snowflake_quoted_identifiers
+    )
+    column_agg_ops: list[AggregateColumnOpParameters] = []
+    # if any value in the dictionary is a list, the aggregation function name is added as
+    # an extra level to the final pandas label, otherwise not. When any value in the dictionary is a list,
+    # the aggregation function name will be added as an extra level for the result label.
+    agg_func_level_included = any(
+        is_list_like(fn) and not is_named_tuple(fn)
+        for fn in column_to_agg_func.values()
+    )
+    pandas_label_level_included = (
+        not agg_func_level_included or not include_agg_func_only_in_result_label
+    )
+
+    for pandas_label_to_identifier, agg_func in column_to_agg_func.items():
+        pandas_label, quoted_identifier = pandas_label_to_identifier
+        agg_func_list = (
+            [agg_func]
+            if not is_list_like(agg_func) or is_named_tuple(agg_func)
+            else agg_func
+        )
+        # generate the pandas label and quoted identifier for the result aggregation columns, one
+        # for each aggregation function to apply.
+        agg_col_labels = generate_pandas_labels_for_agg_result_columns(
+            pandas_label_to_identifier.pandas_label,
+            num_levels,
+            [func for (func, _) in agg_func_list],
+            agg_func_level_included,
+            pandas_label_level_included,
+        )
+        agg_col_identifiers = (
+            internal_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=agg_col_labels, excluded=identifiers_to_exclude
+            )
+        )
+        identifiers_to_exclude += agg_col_identifiers
+        # construct the ColumnAggregationInfo for each aggregation
+        for func_info, label, identifier in zip(
+            agg_func_list, agg_col_labels, agg_col_identifiers
+        ):
+            (func, is_dummy_agg) = func_info
+            agg_func_col = pandas_lit(None) if is_dummy_agg else quoted_identifier
+            snowflake_agg_func = get_snowflake_agg_func(func, agg_kwargs, axis=0)
+            # once reach here, we require all func have a corresponding snowflake aggregation function.
+            # check_is_aggregation_supported_in_snowflake can be used to help performing the check.
+            assert (
+                snowflake_agg_func
+            ), f"no snowflake aggregation function found for {func}"
+            column_agg_ops.append(
+                AggregateColumnOpParameters(
+                    snowflake_quoted_identifier=agg_func_col,
+                    data_type=quoted_identifier_to_snowflake_type[quoted_identifier],
+                    agg_pandas_label=label,
+                    agg_snowflake_quoted_identifier=identifier,
+                    snowflake_agg_func=snowflake_agg_func,
+                    ordering_columns=internal_frame.ordering_columns,
+                )
+            )
+
+    new_data_column_index_names: list[Hashable] = []
+    if pandas_label_level_included:
+        new_data_column_index_names += internal_frame.data_column_pandas_index_names
+    if agg_func_level_included:
+        new_data_column_index_names += [None]
+
+    return column_agg_ops, new_data_column_index_names
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
new file mode 100644
index 00000000000..cd35deeeb11
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
@@ -0,0 +1,1258 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import json
+import sys
+from collections import namedtuple
+from collections.abc import Hashable
+from enum import Enum, auto
+from typing import Any, Callable, Literal, Optional, Union
+
+import cloudpickle
+import numpy as np
+import pandas as native_pd
+from pandas._typing import AggFuncType
+from pandas.api.types import is_scalar
+
+from snowflake.snowpark._internal.type_utils import PYTHON_TO_SNOW_TYPE_MAPPINGS
+from snowflake.snowpark._internal.udf_utils import get_types_from_type_hints
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import builtin, col, dense_rank, udf, udtf
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    TempObjectType,
+    parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label,
+    parse_snowflake_object_construct_identifier_to_map,
+)
+from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL
+from snowflake.snowpark.session import Session
+from snowflake.snowpark.types import (
+    ArrayType,
+    DataType,
+    IntegerType,
+    LongType,
+    MapType,
+    PandasDataFrameType,
+    PandasSeriesType,
+    StringType,
+    VariantType,
+)
+from snowflake.snowpark.udf import UserDefinedFunction
+from snowflake.snowpark.udtf import UserDefinedTableFunction
+from snowflake.snowpark.window import Window
+
+APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER = '"LABEL"'
+APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER = '"VALUE"'
+APPLY_ORIGINAL_ROW_POSITION_COLUMN_QUOTED_IDENTIFIER = '"ORIGINAL_ROW_POSITION"'
+APPLY_ROW_POSITION_WITHIN_GROUP_COLUMN_QUOTED_IDENTIFIER = '"ROW_POSITION_WITHIN_GROUP"'
+APPLY_FIRST_GROUP_KEY_OCCURRENCE_POSITION_QUOTED_IDENTIFIER = (
+    '"FIRST_GROUP_KEY_OCCURRENCE_POSITION"'
+)
+
+# Default partition size to use when applying a UDTF. A higher value results in less parallelism, less contention and higher batching.
+DEFAULT_UDTF_PARTITION_SIZE = 1000
+
+# Use the workaround described below to use functions that are attributes of
+# this module in UDFs and UDTFs. Without this workaround, we can't pickle
+# those functions.
+# https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
+cloudpickle.register_pickle_by_value(sys.modules[__name__])
+
+
+class GroupbyApplySortMethod(Enum):
+    """
+    A rule for sorting the rows resulting from groupby.apply.
+    """
+
+    UNSET = auto()
+
+    # order by order of the input row that each output row originated from.
+    ORIGINAL_ROW_ORDER = auto()
+    # order by 1) comparing the group keys to each other 2) resolving
+    # ties by the order within the result for each group. this is like
+    # "sort=True" for groupby aggregations.
+    GROUP_KEY_COMPARISON_ORDER = auto()
+    # order by 1) ordering by the order in which the group keys appear
+    # in the original frame 2) resolving ties by the order within the
+    # result for each group. this is like "sort=false" for groupby
+    # aggregations.
+    GROUP_KEY_APPEARANCE_ORDER = auto()
+
+
+def check_return_variant_and_get_return_type(func: Callable) -> tuple[bool, DataType]:
+    """Check whether the function returns a variant in Snowflake, and get its return type."""
+    return_type, _ = get_types_from_type_hints(func, TempObjectType.FUNCTION)
+    if return_type is None or isinstance(
+        return_type, (VariantType, PandasSeriesType, PandasDataFrameType)
+    ):
+        # By default, we assume it is a series-to-series function
+        # However, vectorized UDF only allows returning one column
+        # We will convert the result series to a list, which will be
+        # returned as a Variant
+        return_variant = True
+    else:
+        return_variant = False
+    return return_variant, return_type
+
+
+def create_udtf_for_apply_axis_1(
+    row_position_snowflake_quoted_identifier: str,
+    func: Union[Callable, UserDefinedFunction],
+    raw: bool,
+    result_type: Optional[Literal["expand", "reduce", "broadcast"]],
+    args: tuple,
+    column_index: native_pd.Index,
+    input_types: list[DataType],
+    session: Session,
+    **kwargs: Any,
+) -> UserDefinedTableFunction:
+    """
+    Creates a wrapper UDTF for `func` to produce narrow table results for row-wise `df.apply` (i.e., `axis=1`).
+    The UDTF produces 3 columns: row position column, label column and value column.
+
+    The label column maintains a json string from a dict, which contains
+    a pandas label in the current series, and its occurrence. We need to
+    record the occurrence to deduplicate the duplicate labels so the later pivot
+    operation on the label column can create separate columns on duplicate labels.
+    The value column maintains the value of the result after applying `func`.
+
+    Args:
+        row_position_snowflake_quoted_identifier: quoted identifier identifying the row position column passed into the UDTF.
+        func: The UDF to apply row-wise.
+        raw: pandas parameter controlling apply within the UDTF.
+        result_type: pandas parameter controlling apply within the UDTF.
+        args: pandas parameter controlling apply within the UDTF.
+        column_index: The columns of the callee DataFrame, i.e. df.columns as pd.Index object.
+        input_types: Snowpark column types of the input data columns.
+        **kwargs: pandas parameter controlling apply within the UDTF.
+
+    Returns:
+        Snowpark vectorized UDTF producing 3 columns.
+    """
+
+    # If given as Snowpark function, extract packages.
+    udf_packages = []
+    if isinstance(func, UserDefinedFunction):
+        # TODO: Cover will be achieved with SNOW-1261830.
+        udf_packages = func._packages  # pragma: no cover
+        func = func.func  # pragma: no cover
+
+    class ApplyFunc:
+        def end_partition(self, df):  # type: ignore[no-untyped-def] # pragma: no cover
+            # First column is row position, set as index.
+            df = df.set_index(df.columns[0])
+
+            df.columns = column_index
+            df = df.apply(
+                func, axis=1, raw=raw, result_type=result_type, args=args, **kwargs
+            )
+            # When a dataframe is returned from `df.apply`,
+            # `func` is a series-to-series function, e.g.,
+            # def func(row):
+            #    result = row + 1
+            #    result.index.name = 'new_index_name'
+            #    return result
+            #
+            # For example, the original dataframe is
+            #    a  b  b
+            # 0  0  1  2
+            #
+            # the result dataframe from `df.apply` is
+            # new_index_name  a  b  b
+            # 0               1  2  3
+            # After the transformation below, we will get a dataframe with two
+            # columns. Each row in the result represents the series result
+            # at a particular position.
+            #                                              "LABEL"  "VALUE"
+            # 0  {"pos": 0, "0": "a", "names": ["new_index_name"]}        1
+            # 1  {"pos": 1, "0": "b", "names": ["new_index_name"]}        2
+            # 2  {"pos": 2, "0": "b", "names": ["new_index_name"]}        3
+            # where:
+            # - `pos` indicates the position within the series.
+            # - The integer keys like "0" map from index level to the result's
+            #   label at that level. In this case, the result only has one
+            #   index level.
+            # - `names` contains the names of the result's index levels.
+            # - VALUE contains the result at this position.
+            if isinstance(df, native_pd.DataFrame):
+                result = []
+                for row_position_index, series in df.iterrows():
+
+                    for i, (label, value) in enumerate(series.items()):
+                        # If this is a tuple then we store each component with a 0-based
+                        # lookup.  For example, (a,b,c) is stored as (0:a, 1:b, 2:c).
+                        if isinstance(label, tuple):
+                            obj_label = {k: v for k, v in enumerate(list(label))}
+                        else:
+                            obj_label = {0: label}
+                        obj_label["names"] = series.index.names
+                        obj_label["pos"] = i
+                        result.append(
+                            [
+                                row_position_index,
+                                json.dumps(obj_label),
+                                value,
+                            ]
+                        )
+                # use object type so the result is json-serializable
+                result = native_pd.DataFrame(
+                    result, columns=["__row__", "label", "value"], dtype=object
+                )
+            # When a series is returned from `df.apply`,
+            # `func` is a series-to-scalar function, e.g., `np.sum`
+            # For example, the original dataframe is
+            #    a  b
+            # 0  1  2
+            # and the result series from `df.apply` is
+            # 0    3
+            # dtype: int64
+            # After the transformation below, we will get a dataframe with two columns:
+            #        "LABEL"                        "VALUE"
+            # 0  {'0': MODIN_UNNAMED_SERIES_LABEL}        3
+            elif isinstance(df, native_pd.Series):
+                result = df.to_frame(name="value")
+                result.insert(0, "label", json.dumps({"0": MODIN_UNNAMED_SERIES_LABEL}))
+                result.reset_index(names="__row__", inplace=True)
+            else:
+                raise TypeError(f"Unsupported data type {df} from df.apply")
+
+            result["value"] = (
+                result["value"].apply(handle_missing_value_in_variant).astype(object)
+            )
+            return result
+
+    ApplyFunc.end_partition._sf_vectorized_input = native_pd.DataFrame  # type: ignore[attr-defined]
+
+    packages = list(session.get_packages().values()) + udf_packages
+    func_udtf = udtf(
+        ApplyFunc,
+        output_schema=PandasDataFrameType(
+            [LongType(), StringType(), VariantType()],
+            [
+                row_position_snowflake_quoted_identifier,
+                APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER,
+                APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER,
+            ],
+        ),
+        input_types=[PandasDataFrameType([LongType()] + input_types)],
+        # We have to use the current pandas version to ensure the behavior consistency
+        packages=[native_pd] + packages,
+        session=session,
+    )
+
+    return func_udtf
+
+
+def convert_groupby_apply_dataframe_result_to_standard_schema(
+    func_input_df: native_pd.DataFrame,
+    func_output_df: native_pd.DataFrame,
+    input_row_positions: native_pd.Series,
+    include_index_columns: bool,
+) -> native_pd.DataFrame:  # pragma: no cover: this function runs inside a UDTF, so coverage tools can't detect that we are testing it.
+    """
+    Take the result of applying the user-provided function to a dataframe, and convert it to a dataframe with known schema that we can output from a vUDTF.
+
+    Args:
+        func_input_df: The input to `func`, where `func` is the Python function
+                       that the  user originally passed to apply().
+        func_output_df: The output of `func`.
+        input_row_positions: The original row positions of the rows that
+                             func_input_df came from.
+        include_index_columns: Whether to include the result's index columns in
+                               the output.
+
+    Returns:
+        A 5-column dataframe that represents the function result per the
+        description in create_udtf_for_groupby_apply.
+
+    """
+    result_rows = []
+    result_index_names = func_output_df.index.names
+    is_transform = func_output_df.index.equals(func_input_df.index)
+    for row_number, (index_label, row) in enumerate(func_output_df.iterrows()):
+        output_row_number = input_row_positions.iloc[row_number] if is_transform else -1
+        if include_index_columns:
+            if isinstance(index_label, tuple):
+                for k, v in enumerate(index_label):
+                    result_rows.append(
+                        [
+                            json.dumps({"index_pos": k, "name": result_index_names[k]}),
+                            row_number,
+                            v,
+                            output_row_number,
+                        ]
+                    )
+            else:
+                result_rows.append(
+                    [
+                        json.dumps({"index_pos": 0, "name": result_index_names[0]}),
+                        row_number,
+                        index_label,
+                        output_row_number,
+                    ]
+                )
+        for col_number, (label, value) in enumerate(row.items()):
+            obj_label: dict[Any, Any] = {}
+            if isinstance(label, tuple):
+                obj_label = {k: v for k, v in enumerate(list(label))}
+            else:
+                obj_label = {0: label}
+            obj_label["data_pos"] = col_number
+            obj_label["names"] = row.index.names
+            result_rows.append(
+                [
+                    json.dumps(obj_label),
+                    row_number,
+                    convert_numpy_int_result_to_int(value),
+                    output_row_number,
+                ]
+            )
+    # use object type so the result is json-serializable
+    result_df = native_pd.DataFrame(
+        result_rows,
+        columns=[
+            "label",
+            "row_position_within_group",
+            "value",
+            "original_row_number",
+        ],
+        dtype=object,
+    )
+    result_df["value"] = (
+        result_df["value"].apply(handle_missing_value_in_variant).astype(object)
+    )
+    result_df["first_position_for_group"] = input_row_positions.iloc[0]
+    return result_df
+
+
+def create_groupby_transform_func(
+    func: Callable, by: str, level: Any, *args: Any, **kwargs: Any
+) -> Callable:
+    """
+    Helper function to create the groupby lambda required for DataFrameGroupBy.transform.
+    This is a workaround to prevent pickling DataFrame objects: the pickle module will
+    try to pickle all objects accessible to the function passed in.
+
+    Args
+    ----
+    func: The function to create the groupby lambda required for DataFrameGroupBy.
+    by: The column(s) to group by.
+    level: If the axis is a MultiIndex (hierarchical), group by a particular level or levels.
+           Do not specify both by and level.
+    args: Function's positional arguments.
+    kwargs: Function's keyword arguments.
+
+
+    Returns
+    -------
+    A lambda function that can be used in place of func in groupby transform.
+    """
+    # - `dropna` controls whether the NA values should be included as a group/be present
+    #    in the group keys. Therefore, it must be False to ensure that no values are excluded.
+    # Setting `dropna=True` here raises the IndexError: "cannot do a non-empty take from an empty axes."
+    # This is because any dfs created from the NA group keys result in empty dfs to work with,
+    # which cannot be used with the `take` method.
+    #
+    # - `group_keys` controls whether the grouped column(s) are included in the index.
+    # - `sort` controls whether the group keys are sorted.
+    # - `as_index` controls whether the groupby object has group labels as the index.
+
+    # The index of the result of any transform call is guaranteed to be the original
+    # index. Therefore, the groupby parameters group_keys, sort, and as_index do not
+    # affect the result of transform, and are not explicitly specified.
+
+    return lambda df: (
+        df.groupby(by=by, level=level, dropna=False).transform(func, *args, **kwargs)
+    )
+
+
+def create_udtf_for_groupby_apply(
+    func: Callable,
+    args: tuple,
+    kwargs: dict,
+    data_column_index: native_pd.Index,
+    index_column_names: list,
+    input_data_column_types: list[DataType],
+    input_index_column_types: list[DataType],
+    session: Session,
+) -> UserDefinedTableFunction:
+    """
+    Create a UDTF from the Python function for groupby.apply.
+
+    The UDTF takes as input the following columns in the listed order:
+    1. The original row position within the dataframe (not just within the group)
+    2. All the index columns
+    3. All the data columns
+
+    The UDF returns as output the following columns in the listed order. There is
+    one row per result row and per result column.
+    1. The label for the row or index level value. This is a json string of a dict
+       representing the label.
+
+        For output rows representing data values, this looks like e.g. if the
+        data column ('a', 'int_col') is the 4th column, and the entire column
+        index has names ('l1', 'l2'):
+            {"data_pos": 4, "0": "a", "1": "int_col", "names": ["l1", "l2"]}
+
+        Note that "names" is common across all data columns.
+
+        For values of an index level, this looks like e.g. if the index level
+        3 has name "level_3":
+            {"index_pos": 3, name: "level_3"}
+    2. The row position of this result row within the group.
+    3. The value of the index level or the data column at this row.
+    4. For transforms, this gives the position of the input row that produced
+       this result row. We need this for transforms when group_keys=False
+       because we have to reindex the final result according to original row
+       position. If `func` is not a transform, this position is -1.
+    5. The position of the first row from the input dataframe that fell into
+       this group. For example, if we are grouping by column "A", we divide
+       the input dataframe into groups where column A is equal to "a1", where
+       it's equal to "a2", etc. We then apply `func` to each group. If "a2"
+       first appears in row position 0, then all output rows resulting from the
+       "a2" group get a value of 0 for this column. If "a1" first appears in
+       row position 1, then all output rows resulting from the "a1" group get
+       a value of 1 for this column. e.g.:
+
+        Input dataframe
+        ---------------
+        position      A     B
+        0             a2   b0
+        1             a1   b1
+        2             a2   b2
+
+
+        Input Groups
+        ------------
+
+        for group_key == a1:
+
+        A    B
+        a1   b1
+
+        for group_key == a2:
+
+        A    B
+        a1   b1
+
+        Output Groups
+        -------------
+
+        for group_key == a1:
+
+        first_appearance_position       other result columns...
+        1                               other result values....
+
+        for group_key == a2:
+
+        first_appearance_position       other result columns...
+        0                               other result values....
+        0                               other result values....
+
+    Args
+    ----
+    func: The function we need to apply to each group
+    args: Function's positional arguments
+    kwargs: Function's keyword arguments
+    data_column_index: Column labels for the input dataframe
+    index_column_names: Names of the input dataframe's index
+    input_data_column_types: Types of the input dataframe's data columns
+    input_index_column_types: Types of the input dataframe's index columns
+
+    Returns
+    -------
+    A UDTF that will apply the provided function to a group and return a
+    dataframe representing all of the data and metadata of the result.
+    """
+
+    class ApplyFunc:
+        def end_partition(self, df: native_pd.DataFrame):  # type: ignore[no-untyped-def] # pragma: no cover: adding type hint causes an error when creating udtf. also, skip coverage for this function because coverage tools can't tell that we're executing this function because we execute it in a UDTF.
+            """
+            Apply the user-provided function to the group represented by this partition.
+
+            Args
+            ----
+            df: The dataframe representing one group
+
+            Returns
+            -------
+            A dataframe representing the result of applying the user-provided
+            function to this group.
+            """
+            # First column is row position, save it for later.
+            row_positions = df.iloc[:, 0]
+            df = df.iloc[:, 1:]
+
+            df.set_index(
+                [f"ARG{i}" for i in range(2, len(index_column_names) + 2)], inplace=True
+            )
+            df.index.names = index_column_names
+            df.columns = data_column_index
+            # Use infer_objects() because integer columns come as floats
+            # TODO: file snowpark bug about that. Asked about this here:
+            # https://github.com/snowflakedb/snowpandas/pull/823/files#r1507286892
+            input_df = df.infer_objects()
+            func_result = func(input_df, *args, **kwargs)
+            if isinstance(func_result, native_pd.Series):
+                # If function returns series, we have to transpose the series
+                # and change its metadata a little bit, but after that we can
+                # continue largely as if the function has returned a dataframe.
+                #
+                # If the series has a 1-dimensional index, the series name
+                # becomes the name of the column index. For example, if
+                # `func` returned the series native_pd.Series([1], name='a'):
+                #
+                # 0    1
+                # Name: a, dtype: int64
+                #
+                # The result needs to use the dataframe
+                # pd.DataFrame([1], columns=pd.Index([0], name='a'):
+                #
+                # a  0
+                # 0  1
+                #
+                name = func_result.name
+                func_result.name = None
+                func_result_as_frame = func_result.to_frame().T
+                if func_result_as_frame.columns.nlevels == 1:
+                    func_result_as_frame.columns.name = name
+                return convert_groupby_apply_dataframe_result_to_standard_schema(
+                    input_df,
+                    func_result_as_frame,
+                    row_positions,
+                    # We don't need to include any information
+                    # about the index of `func_result_as_frame`.
+                    # The series only has one index, and that
+                    # index becomes the columns of
+                    # `func_result_as_frame`.
+                    include_index_columns=False,
+                )
+            if isinstance(func_result, native_pd.DataFrame):
+                return convert_groupby_apply_dataframe_result_to_standard_schema(
+                    input_df, func_result, row_positions, include_index_columns=True
+                )
+            # At this point, we know the function result was not a DataFrame
+            # or Series
+            return native_pd.DataFrame(
+                {
+                    "label": [
+                        json.dumps({"0": MODIN_UNNAMED_SERIES_LABEL, "data_pos": 0})
+                    ],
+                    "row_position_within_group": [0],
+                    "value": [convert_numpy_int_result_to_int(func_result)],
+                    "original_row_number": [-1],
+                    "first_position_for_group": [row_positions.iloc[0]],
+                },
+                # use object dtype so result is JSON-serializable
+                dtype=object,
+            )
+
+    input_types = [
+        # first input column is the integer row number. the row number integer
+        # becomes a float inside the UDTF due to SNOW-1184587
+        LongType(),
+        # the next columns are the index columns...
+        *input_index_column_types,
+        # ...then come the data columns.
+        *input_data_column_types,
+    ]
+    return udtf(
+        ApplyFunc,
+        output_schema=PandasDataFrameType(
+            [StringType(), IntegerType(), VariantType(), IntegerType(), IntegerType()],
+            [
+                APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER,
+                APPLY_ROW_POSITION_WITHIN_GROUP_COLUMN_QUOTED_IDENTIFIER,
+                APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER,
+                APPLY_ORIGINAL_ROW_POSITION_COLUMN_QUOTED_IDENTIFIER,
+                APPLY_FIRST_GROUP_KEY_OCCURRENCE_POSITION_QUOTED_IDENTIFIER,
+            ],
+        ),
+        input_types=[PandasDataFrameType(col_types=input_types)],
+        # We have to specify the local pandas package so that the UDF's pandas
+        # behavior is consistent with client-side pandas behavior.
+        packages=[native_pd] + list(session.get_packages().values()),
+        session=session,
+    )
+
+
+def create_udf_for_series_apply(
+    func: Union[Callable, UserDefinedFunction],
+    return_type: DataType,
+    input_type: DataType,
+    na_action: Optional[Literal["ignore"]],
+    session: Session,
+    args: tuple[Any, ...],
+    **kwargs: Any,
+) -> UserDefinedFunction:
+    """
+    Creates Snowpark user defined function to use like a columnar expression from given func or existing Snowpark user defined function.
+
+    Args:
+        func: a Python function or Snowpark user defined function.
+        return_type: return type of the function as Snowpark type.
+        input_type: input type of the function as Snowpark type.
+        na_action: if "ignore", use strict mode.
+        session: Snowpark session, should be identical with pd.session
+        args: positional arguments to pass to the UDF
+        **kwargs: keyword arguments to pass to the UDF
+
+    Returns:
+        Snowpark user defined function.
+    """
+
+    # Start with session packages.
+    packages = list(session.get_packages().values())
+
+    # Snowpark function with annotations, extract underlying func to wrap.
+    if isinstance(func, UserDefinedFunction):
+        # Ensure return_type specified is identical.
+        assert func._return_type == return_type
+
+        # Append packages from function.
+        packages += func._packages
+
+        # Below the function func is wrapped again, extract here the underlying Python function.
+        func = func.func
+
+    if isinstance(return_type, VariantType):
+
+        def apply_func(x):  # type: ignore[no-untyped-def] # pragma: no cover
+            result = []
+            # When the return type is Variant, the return value must be json-serializable
+            # Calling tolist() convert np.int*, np.bool*, etc. (which is not
+            # json-serializable) to python native values
+            for e in x.apply(func, args=args, **kwargs).tolist():
+                result.append(handle_missing_value_in_variant(e))
+            return result
+
+    else:
+
+        def apply_func(x):  # type: ignore[no-untyped-def] # pragma: no cover
+            return x.apply(func, args=args, **kwargs)
+
+    func_udf = udf(
+        apply_func,
+        return_type=PandasSeriesType(return_type),
+        input_types=[PandasSeriesType(input_type)],
+        strict=bool(na_action == "ignore"),
+        session=session,
+        packages=packages,
+    )
+    return func_udf
+
+
+def handle_missing_value_in_variant(value: Any) -> Any:
+    """
+    Returns the correct NULL value in a variant column when a UDF is applied.
+
+    Snowflake supports two types of NULL values, JSON NULL and SQL NULL in variant data.
+    In Snowflake Python UDF, a VARIANT JSON NULL is translated to Python None and A SQL NULL is
+    translated to a Python object, which has the `is_sql_null` attribute.
+    See details in
+    https://docs.snowflake.com/en/user-guide/semistructured-considerations#null-values
+    https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-designing#null-values
+
+    In Snowpark pandas apply/applymap API with a variant column, we return JSON NULL if a Python
+    None is returned in UDF (follow the same as Python UDF), and return SQL null for all other
+    pandas missing values (np.nan, pd.NA, pd.NaT). Note that pd.NA, pd.NaT are not
+    json-serializable, so we need to return a json-serializable value anyway (None or SqlNullWrapper())
+    """
+
+    class SqlNullWrapper:
+        def __init__(self) -> None:
+            self.is_sql_null = True
+
+    if is_scalar(value) and native_pd.isna(value):
+        if value is None:
+            return None
+        else:
+            return SqlNullWrapper()
+    else:
+        return value
+
+
+def convert_numpy_int_result_to_int(value: Any) -> Any:
+    """
+    If the result is a numpy int, convert it to a python int.
+
+    Use this function to make UDF results JSON-serializable. numpy ints are not
+    JSON-serializable, but python ints are. Note that this function cannot make
+    all results JSON-serializable, e.g. it will not convert make
+    [1, np.int64(3)]  or [[np.int64(3)]] serializable by converting the numpy
+    ints to python ints. However, it's very common for functions to return
+    numpy integers or dataframes or series thereof, so if we apply this function
+    to the result (in case the function returns an integer) or each element of
+    the result (in case the function returns a dataframe or series), we can
+    make sure that we return a JSON-serializable column to snowflake.
+
+    Args
+    ----
+    value: The value to fix
+
+    Returns
+    -------
+    int(value) if the value is a numpy int, otherwise the value.
+    """
+    return int(value) if np.issubdtype(type(value), np.integer) else value
+
+
+def deduce_return_type_from_function(
+    func: Union[AggFuncType, UserDefinedFunction]
+) -> Optional[DataType]:
+    """
+    Deduce return type if possible from a function, list, dict or type object. List will be mapped to ArrayType(),
+    dict to MapType(), and if a type object (e.g., str) is given a mapping will be consulted.
+    Args:
+        func: callable function, object or Snowpark UserDefinedFunction that can be passed in pandas to reference a function.
+
+    Returns:
+        Snowpark Datatype or None if no return type could be deduced.
+    """
+
+    # Does function have an @udf decorator? Then return type from it directly.
+    if isinstance(func, UserDefinedFunction):
+        return func._return_type
+
+    # get the return type of type hints
+    # PYTHON_TO_SNOW_TYPE_MAPPINGS contains some Python builtin functions that
+    # can only return the certain type (e.g., `str` will return string)
+    # if we can't get the type hints from the function,
+    # use variant as the default, which can hold any type of value
+    if isinstance(func, list):
+        return ArrayType()
+    elif isinstance(func, dict):
+        return MapType()
+    elif func in PYTHON_TO_SNOW_TYPE_MAPPINGS:
+        return PYTHON_TO_SNOW_TYPE_MAPPINGS[func]()
+    else:
+        # handle special case 'object' type, in this case use Variant Type.
+        # Catch potential TypeError exception here from python_type_to_snow_type.
+        # If it is not the object type, return None to indicate that type hint could not be extracted successfully.
+        try:
+            return get_types_from_type_hints(func, TempObjectType.FUNCTION)[0]
+        except TypeError as te:
+            if str(te) == "invalid type <class 'object'>":
+                return VariantType()
+            return None
+
+
+def sort_apply_udtf_result_columns_by_pandas_positions(
+    positions: list[int],
+    pandas_labels: list[Hashable],
+    snowflake_quoted_identifiers: list[str],
+) -> tuple[list[Hashable], list[str]]:
+    """
+    Sort the columns resulting from a UDTF according the position they should take in the resulting pandas dataframe.
+
+    Args
+    ----
+    positions: Positions the columns should take in the resulting pandas dataframe.
+    pandas_labels: The pandas labels of the columns
+    snowflake_quoted_identifiers: The snowflake quoted identifiers of the columns.
+
+    Returns:
+    -------
+    tuple where first element has the sorted pandas labels, and second has the sorted quoted identifiers.
+    """
+    # We group the column information together as a tuple (position, pandas
+    # label, snowflake identifier) to make it easier for sorting as needed.
+    ColumnInfo = namedtuple(
+        "ColumnInfo",
+        ["position", "pandas_label", "snowflake_quoted_identifier"],
+    )
+
+    column_info = [
+        ColumnInfo(position, pandas_label, snowflake_quoted_identifier)
+        for position, pandas_label, snowflake_quoted_identifier in zip(
+            positions,
+            pandas_labels,
+            snowflake_quoted_identifiers,
+        )
+    ]
+
+    # Sort based on the column position information.
+    column_info.sort(key=lambda x: x.position)
+
+    pandas_labels = [info.pandas_label for info in column_info]
+    snowflake_quoted_identifiers = [
+        info.snowflake_quoted_identifier for info in column_info
+    ]
+    return pandas_labels, snowflake_quoted_identifiers
+
+
+def get_metadata_from_groupby_apply_pivot_result_column_names(
+    func_result_snowflake_quoted_identifiers: list[str],
+) -> tuple[list[Hashable], list[Hashable], list[str], list[Hashable], list[str]]:
+    """
+    Extract the pandas and snowflake metadata from the column names of the pivot result for groupby.apply.
+
+    Args:
+        func_result_snowflake_quoted_identifiers:
+            The identifiers of the columns that represent the function result.
+
+    Returns:
+        A tuple containing the following, in the order below:
+            1. A list containing the names of the column index for the resulting dataframe
+            2. A list containing the pandas labels of the data columns in the function result
+            3. A list containing the snowflake quoted identifiers of the data columns in the function result.
+            4. A list containing the pandas labels of the index columns in the function result
+            5. A list containing the snowflake quoted identifiers of the index columns in the function result
+
+    Examples
+    --------
+    # not doing a doctest because it seems to choke on some of the input characters
+    # due to the escaping.
+
+    input:
+
+    get_metadata_from_groupby_apply_pivot_result_column_names([
+                 # this representa a data column named ('a', 'group_key') at position 0
+                 '"\'{""0"": ""a"", ""1"": ""group_key"", ""data_pos"": 0, ""names"": [""c1"", ""c2""]}\'"',
+                 # this represents a data column named  ('b', 'int_col') at position 1
+                '"\'{""0"": ""b"", ""1"": ""int_col"", ""data_pos"": 1, ""names"": [""c1"", ""c2""]}\'"',
+                 # this repesents a data column named ('b', 'string_col') at position 2
+                 '"\'{""0"": ""b"", ""1"": ""string_col"", ""data_pos"": 2, ""names"": [""c1"", ""c2""]}\'"',
+                 # this represents an index column for an index level named "i1"
+                 '"\'{""index_pos"": 0, ""name"": ""i1""}\'"',
+                # this represents an index column for an index level named "i2"
+                 '"\'{""index_pos"": 1, ""name"": ""i2""}\'"'
+        ])
+
+    output:
+
+    (
+        # these are the column index's names
+        ['c1', 'c2'],
+        # these are data column labels
+        [('a', 'group_key'), ('b', 'int_col'), ('b', 'string_col')],
+        # these are the snowflake quoted identifiers of the data columns
+        ['"\'{""0"": ""a"", ""1"": ""group_key"", ""data_pos"": 0, ""names"": [""c1"", ""c2""]}\'"',
+        '"\'{""0"": ""b"", ""1"": ""int_col"", ""data_pos"": 1, ""names"": [""c1"", ""c2""]}\'"',
+        '"\'{""0"": ""b"", ""1"": ""string_col"", ""data_pos"": 2, ""names"": [""c1"", ""c2""]}\'"'
+        ],
+        # these are the names of the index levels
+        ['i1', 'i2'],
+        # these are the snowflake quoted identifiers of the index columns
+        ['"\'{""index_pos"": 0, ""name"": ""i1""}\'"', '"\'{""index_pos"": 1, ""name"": ""i2""}\'"']
+    )
+
+    """
+    index_column_snowflake_quoted_identifiers = []
+    data_column_snowflake_quoted_identifiers = []
+    data_column_kv_maps = []
+    index_column_kv_maps = []
+    index_column_pandas_labels = []
+    data_column_pandas_labels = []
+    column_index_names = None
+    for identifier in func_result_snowflake_quoted_identifiers:
+        object_map = parse_snowflake_object_construct_identifier_to_map(identifier)
+        if "index_pos" in object_map:
+            index_column_snowflake_quoted_identifiers.append(identifier)
+            index_column_pandas_labels.append(object_map["name"])
+            index_column_kv_maps.append(object_map)
+        else:
+            if column_index_names is None:
+                # if the object map has no 'names', it represents an
+                # aggregation, i.e. `func` returned a scalar instead of a
+                # dataframe or series. The result's columns always have a
+                # single level named `None`.
+                column_index_names = object_map.get("names", [None])
+            (
+                data_column_pandas_label,
+                data_column_kv_map,
+            ) = parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label(
+                identifier, num_levels=len(column_index_names)
+            )
+            data_column_pandas_labels.append(data_column_pandas_label)
+            data_column_kv_maps.append(data_column_kv_map)
+            data_column_snowflake_quoted_identifiers.append(identifier)
+    assert (
+        column_index_names is not None
+    ), "Pivot result should include at least one data column"
+
+    data_column_positions = [kv["data_pos"] for kv in data_column_kv_maps]
+    index_column_positions = [kv["index_pos"] for kv in index_column_kv_maps]
+
+    # ignore these cases because we have to merge the different column
+    # indices
+    # TODO(SNOW-1232208): Handle this case. Note that the pandas behavior for
+    # this case when func returns a series is contested
+    # https://github.com/pandas-dev/pandas/issues/54992
+    if len(set(data_column_positions)) != len(data_column_positions):
+        # We can end up here if the column indices differ either in their names
+        # or in their values. For example:
+        # 1) one group returns a dataframe whose columns are pd.Index(['col_0'], name="group_1_columns"),
+        #    and another group returns a dataframe whose columns are pd.Index(['col_0'], name="group_2_columns").
+        #
+        #    In this case, the snowflake labels for each result's 0th column are like
+        #      {0: "col_0", "data_pos": 0, "names": ["group_1_columns"]},
+        #      {0: "col_0", "data_pos", 0, "names": ["group_2_columns"]}
+        #
+        # 2) one group returns a dataframe whose columns are pd.Index(['col_0'], name="columns"),
+        #    and another group returns a dataframe whose columns are pd.Index(['col_1']), name="columns").
+        #
+        #    In this case, the snowflake labels for each result's 0th column are like
+        #      {0: "col_0", "data_pos": 0, "names": ["columns"]},
+        #      {0: "col_1", "data_pos", 0, "names": ["columns"]}
+        raise NotImplementedError(
+            "No support for applying a function that returns two dataframes that have different labels for the column at a given position, "
+            + "a function that returns two dataframes that have different column index names, "
+            + "or a function that returns two series with different names or conflicting labels for the row at a given position."
+        )
+    if len(set(index_column_positions)) != len(index_column_positions):
+        raise NotImplementedError(
+            "No support for applying a function that returns two dataframes that have different names for a given index level"
+        )
+
+    (
+        data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers,
+    ) = sort_apply_udtf_result_columns_by_pandas_positions(
+        data_column_positions,
+        data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers,
+    )
+    (
+        index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers,
+    ) = sort_apply_udtf_result_columns_by_pandas_positions(
+        index_column_positions,
+        index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers,
+    )
+
+    return (
+        column_index_names,
+        data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers,
+    )
+
+
+def groupby_apply_pivot_result_to_final_ordered_dataframe(
+    ordered_dataframe: OrderedDataFrame,
+    agg_func: Callable,
+    by_snowflake_quoted_identifiers_list: list[str],
+    sort_method: GroupbyApplySortMethod,
+    as_index: bool,
+    original_row_position_snowflake_quoted_identifier: str,
+    group_key_appearance_order_quoted_identifier: str,
+    row_position_within_group_snowflake_quoted_identifier: str,
+    data_column_snowflake_quoted_identifiers: list[str],
+    index_column_snowflake_quoted_identifiers: list[str],
+    renamed_data_column_snowflake_quoted_identifiers: list[str],
+    renamed_index_column_snowflake_quoted_identifiers: list[str],
+    new_index_identifier: str,
+    func_returned_dataframe: bool,
+) -> OrderedDataFrame:
+    """
+    Convert the intermediate groupby.apply result to the final OrderedDataFrame.
+
+    Sort in the correct order and rename index and data columns as needed. Add
+    an index column if as_index=False.
+
+    Args:
+        ordered_dataframe:
+            The intermediate result.
+        agg_func:
+            The original function passed to groupby.apply
+        by_snowflake_quoted_identifiers_list:
+            identifiers for columns we're grouping by
+        sort_method:
+            How to sort the result
+        as_index:
+            If true, add group keys as levels in the index. Otherwise, generate a
+            new index that is equivalent to the new row positions.
+        original_row_position_snowflake_quoted_identifier:
+            The label of the original row that each result row originates from.
+        group_key_appearance_order_quoted_identifier:
+            The identifier for the column that tells the position of the row
+            where this group key first occurred in the input dataframe.
+        row_position_within_group_snowflake_quoted_identifier:
+            The label of the row position within each group result.
+        data_column_snowflake_quoted_identifiers:
+            The identifiers of the data columns of the function results.
+        index_column_snowflake_quoted_identifiers:
+            The identifiers of the index columns of the function results.
+        renamed_data_column_snowflake_quoted_identifiers:
+            What to rename the data columns to
+        renamed_index_column_snowflake_quoted_identifiers:
+            What to rename the index columns to
+        new_index_identifier:
+            The identifier for the new index level that we add if as_index=False.
+        func_returned_dataframe:
+            Whether `agg_func` returned a pandas DataFrame
+    Returns:
+        Ordered dataframe in correct order with all the final snowflake identifiers.
+
+    """
+    return_variant, return_type = check_return_variant_and_get_return_type(agg_func)
+    return ordered_dataframe.sort(
+        *(
+            OrderingColumn(x)
+            for x in (
+                *(
+                    by_snowflake_quoted_identifiers_list
+                    if sort_method is GroupbyApplySortMethod.GROUP_KEY_COMPARISON_ORDER
+                    else [
+                        group_key_appearance_order_quoted_identifier,
+                    ]
+                    if sort_method is GroupbyApplySortMethod.GROUP_KEY_APPEARANCE_ORDER
+                    else [original_row_position_snowflake_quoted_identifier]
+                ),
+                row_position_within_group_snowflake_quoted_identifier,
+            )
+        )
+    ).select(
+        *(
+            # For `func` returning a dataframe:
+            #   if as_index=True:
+            #       the group keys, i.e. the by columns, become the first
+            #       levels of the result index
+            #   If as_index=False:
+            #       We drop the group keys.
+            # Otherwise:
+            #   We always include the group keys.
+            by_snowflake_quoted_identifiers_list
+            if (not func_returned_dataframe or as_index)
+            else []
+        ),
+        *(
+            # Whether `func` returns a dataframe or not, when as_index=False, we
+            # we need to add a new index level that shows where the groups came
+            # from.
+            #   if sorting by original row order:
+            #       the original row position  itself is the new index level.
+            #   Otherwise:
+            #       sort the groups (either in GROUP_KEY_COMPARISON_ORDER or
+            #       in GROUP_KEY_APPEARANCE_ORDER) and assign the
+            #       label i to all rows that came from func(group_i).
+            [
+                original_row_position_snowflake_quoted_identifier
+                if sort_method is GroupbyApplySortMethod.ORIGINAL_ROW_ORDER
+                else (
+                    dense_rank().over(
+                        Window.order_by(
+                            *(
+                                SnowparkColumn(col).asc_nulls_last()
+                                for col in (
+                                    by_snowflake_quoted_identifiers_list
+                                    if sort_method
+                                    is GroupbyApplySortMethod.GROUP_KEY_COMPARISON_ORDER
+                                    else [group_key_appearance_order_quoted_identifier]
+                                )
+                            )
+                        )
+                    )
+                    - 1
+                ).as_(new_index_identifier)
+            ]
+            if not as_index
+            else []
+        ),
+        *[
+            (
+                col(old_quoted_identifier).as_(quoted_identifier)
+                if return_variant
+                else col(old_quoted_identifier).cast(return_type).as_(quoted_identifier)
+            )
+            for old_quoted_identifier, quoted_identifier in zip(
+                data_column_snowflake_quoted_identifiers
+                + index_column_snowflake_quoted_identifiers,
+                renamed_data_column_snowflake_quoted_identifiers
+                + renamed_index_column_snowflake_quoted_identifiers,
+            )
+        ],
+    )
+
+
+def groupby_apply_create_internal_frame_from_final_ordered_dataframe(
+    ordered_dataframe: OrderedDataFrame,
+    func_returned_dataframe: bool,
+    as_index: bool,
+    group_keys: bool,
+    by_pandas_labels: list[Hashable],
+    by_snowflake_quoted_identifiers: list[str],
+    func_result_data_column_pandas_labels: list[Hashable],
+    func_result_data_column_snowflake_quoted_identifiers: list[str],
+    func_result_index_column_pandas_labels: list[Hashable],
+    func_result_index_column_snowflake_quoted_identifiers: list[str],
+    column_index_names: list[str],
+    new_index_identifier: str,
+    original_data_column_pandas_labels: list[Hashable],
+) -> InternalFrame:
+    """
+    Create the InternalFrame for the groupby.apply result from the final OrderedDataFrame.
+
+    Designate the appropriate snowflake columns as data columns and index
+    columns.
+
+    Args:
+        ordered_dataframe:
+            The final, sorted OrderedDataFrame with the result of groupby.apply
+        func_returned_dataframe:
+            Whether the function returned a pandas DataFrame.
+        as_index:
+            Whether to include groups in the index.
+        group_keys:
+            The group_keys argument to groupby()
+        by_pandas_labels:
+            The labels of the grouping columns.
+        by_snowflake_quoted_identifiers:
+            The snowflake identifiers of the grouping columns.
+        func_result_data_column_pandas_labels:
+            The pandas labels for the columns resulting from calling func() on
+            each group. Note that these are assumed to be the same across groups.
+        func_result_data_column_snowflake_quoted_identifiers:
+            Snowflake identifiers for the columns resulting from calling func()
+            on each group. Note that these are assumed to be the same across groups.
+        func_result_index_column_pandas_labels:
+            The pandas labels for the index levels resulting from calling func() on
+            each group. Note that these are assumed to be the same across groups.
+        func_result_index_column_snowflake_quoted_identifiers:
+            Snowflake identifiers for the index levels resulting from calling func()
+            on each group. Note that these are assumed to be the same across groups.
+        column_index_names:
+            The names of the result's column index.
+        new_index_identifier:
+            If as_index=False, use this identifier for a new index level that
+            indicates which group each chunk of the result came from.
+        original_data_column_pandas_labels:
+            The data column pandas labels of the original dataframe.
+
+    Returns:
+        An InternalFrame representing the final result.
+    """
+    if not as_index and not func_returned_dataframe:
+        # If func has not returned a dataframe and as_index=False, we put some
+        # of the by columns in the result instead of in the index.
+        # note we only include columns from the original frame, and we don't
+        # include any index levels that we grouped by:
+        # https://github.com/pandas-dev/pandas/blob/654c6dd5199cb2d6d522dde4c4efa7836f971811/pandas/core/groupby/groupby.py#L1308-L1311
+        data_column_pandas_labels = []
+        data_column_snowflake_quoted_identifiers = []
+        for label, identifier in zip(by_pandas_labels, by_snowflake_quoted_identifiers):
+            if label in original_data_column_pandas_labels:
+                data_column_pandas_labels.append(label)
+                data_column_snowflake_quoted_identifiers.append(identifier)
+        # If func returned a scalar (i.e. not a dataframe or series), we need to
+        # call the column with the function result None instead of
+        # MODIN_UNNAMED_SERIES_LABEL.
+        if func_result_data_column_pandas_labels == [MODIN_UNNAMED_SERIES_LABEL]:
+            data_column_pandas_labels.append(None)
+        else:
+            data_column_pandas_labels.extend(func_result_data_column_pandas_labels)
+        data_column_snowflake_quoted_identifiers.extend(
+            func_result_data_column_snowflake_quoted_identifiers
+        )
+    else:
+        # Otherwise, the final result's data columns are exactly the columns
+        # that `func` returned.
+        data_column_pandas_labels = func_result_data_column_pandas_labels
+        data_column_snowflake_quoted_identifiers = (
+            func_result_data_column_snowflake_quoted_identifiers
+        )
+
+    if (not func_returned_dataframe) or group_keys:
+        # in these cases, we have to prepend index level(s) that indicate which
+        # group each chunk came from. If as_index=True, these levels are the
+        # grouping columns themselves. Otherwise, use the new column containing
+        # the sequential group numbers.
+        if as_index:
+            group_pandas_labels = by_pandas_labels
+            group_quoted_identifiers = by_snowflake_quoted_identifiers
+        else:
+            group_pandas_labels = [None]
+            group_quoted_identifiers = [new_index_identifier]
+    else:
+        group_pandas_labels = []
+        group_quoted_identifiers = []
+
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=data_column_pandas_labels,
+        data_column_pandas_index_names=column_index_names,
+        data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=group_pandas_labels
+        + func_result_index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=group_quoted_identifiers
+        + func_result_index_column_snowflake_quoted_identifiers,
+    )
+
+
+def groupby_apply_sort_method(
+    sort: bool,
+    group_keys: bool,
+    original_row_position_quoted_identifier: str,
+    ordered_dataframe_before_sort: OrderedDataFrame,
+    func_returned_dataframe: bool,
+) -> GroupbyApplySortMethod:
+    """
+    Get the sort method that groupby.apply should use on the result rows.
+
+    This function implements the following pandas logic from [1], where
+    "transform" [2] is a function that returns a result whose index is the
+    same as the index of the dataframe being grouped.
+
+    if func did not return a dataframe, group_keys=True, or this is not a transform:
+        if sort:
+            sort in order of increasing group key values
+        else:
+            sort in order of first appearance of group key values
+    else:
+        reindex result to the original dataframe's order.
+
+    [1] https://github.com/pandas-dev/pandas/blob/e14a9bd41d8cd8ac52c5c958b735623fe0eae064/pandas/core/groupby/groupby.py#L1196
+    [2] https://pandas.pydata.org/docs/user_guide/groupby.html#transformation
+
+    Args:
+        sort:
+            The `sort` argument to groupby()
+        group_keys:
+            The `group_keys` argument to groupby()
+        is_transform_quoted_identifier:
+            The snowflake identifier of the column in the ordered dataframe
+            that tells whether each row comes from a function that acted
+            like a transform.
+        ordered_dataframe_before_sort:
+            Ordered dataframe containing the intermediate, unsorted
+            groupby.apply result.
+        func_returned_dataframe:
+            Whether the user's `func` returned a dataframe.
+
+    Returns:
+        enum telling how to sort.
+
+    """
+    if not func_returned_dataframe or group_keys:
+        return (
+            GroupbyApplySortMethod.GROUP_KEY_COMPARISON_ORDER
+            if sort
+            else GroupbyApplySortMethod.GROUP_KEY_APPEARANCE_ORDER
+        )
+    # to distinguish between transforms and non-transforms, we need to
+    # execute an extra query to compare the index of the result to the
+    # index of the original dataframe.
+    # https://github.com/pandas-dev/pandas/issues/57656#issuecomment-1969454704
+    # Need to wrap column name in IDENTIFIER, or else bool agg function
+    # will treat the name as a string literal
+    is_transform: bool = not ordered_dataframe_before_sort.agg(
+        builtin("boolor_agg")(
+            SnowparkColumn(original_row_position_quoted_identifier) == -1
+        ).as_("is_transform")
+    ).collect()[0][0]
+    return (
+        GroupbyApplySortMethod.ORIGINAL_ROW_ORDER
+        if is_transform
+        else (
+            GroupbyApplySortMethod.GROUP_KEY_COMPARISON_ORDER
+            if sort
+            else GroupbyApplySortMethod.GROUP_KEY_APPEARANCE_ORDER
+        )
+    )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py
new file mode 100644
index 00000000000..23620933c96
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py
@@ -0,0 +1,540 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+from collections.abc import Hashable
+from dataclasses import dataclass
+
+from pandas._typing import Callable, Scalar
+
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import col, concat, floor, iff, repeat, when
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.join_utils import (
+    JoinOrAlignInternalFrameResult,
+)
+from snowflake.snowpark.modin.plugin._internal.type_utils import infer_object_type
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.types import (
+    DataType,
+    NullType,
+    StringType,
+    _FractionalType,
+    _IntegralType,
+)
+
+NAN_COLUMN = pandas_lit("nan").cast("float")
+
+# set of supported binary operations that can be mapped to Snowflake
+SUPPORTED_BINARY_OPERATIONS = {
+    "truediv",
+    "rtruediv",
+    "floordiv",
+    "rfloordiv",
+    "mod",
+    "rmod",
+    "pow",
+    "rpow",
+    "__or__",
+    "__ror__",
+    "__and__",
+    "__rand__",
+    "add",
+    "radd",
+    "sub",
+    "rsub",
+    "mul",
+    "rmul",
+    "eq",
+    "ne",
+    "gt",
+    "lt",
+    "ge",
+    "le",
+}
+
+
+def compute_modulo_between_snowpark_columns(
+    first_operand: SnowparkColumn,
+    first_datatype: DataType,
+    second_operand: SnowparkColumn,
+    second_datatype: DataType,
+) -> SnowparkColumn:
+    """
+    Compute modulo between two Snowpark columns ``first_operand`` and ``second_operand``.
+    Supports only numeric values for operands, raises NotImplementedError otherwise.
+    Module may produce results different from native pandas or Python.
+    """
+    # 0. if f or s is NULL, return NULL (Snowflake's rule)
+    # 1. s == 0, return nan
+    # 2. if s != 0, return f % s
+    #
+    #     Examples
+    # --------
+    # >>> a = pd.Series([7, 7, -7, -7])
+    # >>> b = pd.Series([5, -5, 5, -5])
+    # >>> a % b
+    # 0    2.0
+    # 1    2.0
+    # 2   -2.0
+    # 3   -2.0
+    # dtype: float64
+
+    # >>> a = pd.Series([8.9, -0.22, np.nan, -1.02, 3.15, 2.0])
+    # >>> b = pd.Series([-2.3, -76.34, 5.3, 5.3, 8.12])
+    # >>> a % b
+    # 0    2.00
+    # 1   -0.22
+    # 2     NaN
+    # 3   -1.02
+    # 4    3.15
+    # 5     NaN
+    # dtype: float64
+
+    # Behavior differences
+    # --------------------
+    # Python               pandas 1.5            Snowflake
+    #  7 %  5 =  2          7 %  5 =  2           7 %  5 =  2
+    #  7 % -5 = -3          7 % -5 = -3           7 % -5 =  2
+    # -7 %  5 =  3         -7 %  5 =  3          -7 %  5 = -2
+    # -7 % -5 = -2         -7 % -5 = -2          -7 % -5 = -2
+    #
+    # Snowpark pandas API differs from native pandas results whenever an operand with a negative
+    # sign is used.
+
+    is_first_operand_numeric_type = (
+        isinstance(first_datatype, _IntegralType)
+        or isinstance(first_datatype, _FractionalType)
+        or isinstance(first_datatype, NullType)
+    )
+
+    is_second_operand_numeric_type = (
+        isinstance(second_datatype, _IntegralType)
+        or isinstance(second_datatype, _FractionalType)
+        or isinstance(second_datatype, NullType)
+    )
+
+    if is_first_operand_numeric_type and is_second_operand_numeric_type:
+        return (
+            when(first_operand.is_null() | second_operand.is_null(), None)
+            .when(second_operand == 0, NAN_COLUMN)
+            .otherwise(first_operand % second_operand)
+        )
+    else:
+        ErrorMessage.not_implemented(
+            "Modulo does not support non-numeric types, consider using a UDF with apply instead."
+        )
+
+
+def compute_power_between_snowpark_columns(
+    first_operand: SnowparkColumn,
+    second_operand: SnowparkColumn,
+) -> SnowparkColumn:
+    """
+    Compute power between two Snowpark columns ``first_operand`` and ``second_operand``.
+    """
+    # 0. if f == 1 or s == 0, return 1 or 1.0 based on f's type (pandas' behavior)
+    # 1. if f or s is NULL, return NULL (Snowflake's behavior)
+    # 2. if f is nan, or s is nan, or f < 0 and s can not be cast to int without loss (int(s) != s), return nan
+    #    In Snowflake, if f < 0 and s is not an integer, an invalid floating point operation will be raised.
+    #    E.g., pow(-7, -10.0) is valid, but pow(-7, -10.1) is invalid in snowflake.
+    #    In pandas, pow(-7, -10.1) returns NaN.
+    # 3. else return f ** s
+    result = (
+        when((first_operand == 1) | (second_operand == 0), 1)
+        .when(first_operand.is_null() | second_operand.is_null(), None)
+        .when(
+            (first_operand == NAN_COLUMN)
+            | (second_operand == NAN_COLUMN)
+            | (
+                (first_operand < 0)
+                # it checks whether the value can be cast int without loss
+                & (second_operand.cast("int") != second_operand)
+            ),
+            NAN_COLUMN,
+        )
+        .otherwise(first_operand**second_operand)
+    )
+    return result
+
+
+def is_binary_op_supported(op: str) -> bool:
+    """
+    check whether binary operation is mappable to Snowflake
+    Args
+        op: op as string
+
+    Returns:
+        True if binary operation can be mapped to Snowflake/Snowpark, else False
+    """
+
+    return op in SUPPORTED_BINARY_OPERATIONS
+
+
+def compute_binary_op_between_snowpark_columns(
+    op: str,
+    first_operand: SnowparkColumn,
+    first_datatype: Callable[[], DataType],
+    second_operand: SnowparkColumn,
+    second_datatype: Callable[[], DataType],
+) -> SnowparkColumn:
+    """
+    Compute pandas binary operation for two SnowparkColumns
+    Args:
+        op: pandas operation
+        first_operand: SnowparkColumn for lhs
+        first_datatype: Callable for Snowpark Datatype for lhs, this is lazy so we can avoid pulling the value if
+        it is not needed.
+        second_operand: SnowparkColumn for rhs
+        second_datatype: Callable for Snowpark DateType for rhs, this is lazy so we can avoid pulling the value if
+        it is not needed.
+
+    Returns:
+        SnowparkColumn expr for translated pandas operation
+    """
+
+    binary_op_result_column = None
+
+    # some operators and the data types have to be handled specially to align with pandas
+    # However, it is difficult to fail early if the arithmetic operator is not compatible
+    # with the data type, so we just let the server raise exception (e.g. a string minus a string).
+    if op in ["truediv", "rtruediv", "floordiv", "rfloordiv"]:
+        # rtruediv means b/a, rfloordiv means b//a in Python
+        if op in ["rtruediv", "rfloordiv"]:
+            first_operand, second_operand = (
+                second_operand,
+                first_operand,
+            )
+
+        binary_op_result_column = first_operand / second_operand
+
+        if op in ["floordiv", "rfloordiv"]:
+            binary_op_result_column = floor(binary_op_result_column)
+    elif op in ["mod", "rmod"]:
+        if op == "rmod":
+            first_operand, second_operand = (
+                second_operand,
+                first_operand,
+            )
+        binary_op_result_column = compute_modulo_between_snowpark_columns(
+            first_operand, first_datatype(), second_operand, second_datatype()
+        )
+    elif op in ["pow", "rpow"]:
+        if op == "rpow":
+            first_operand, second_operand = (
+                second_operand,
+                first_operand,
+            )
+        binary_op_result_column = compute_power_between_snowpark_columns(
+            first_operand, second_operand
+        )
+    elif op in ["__or__", "__ror__"]:
+        binary_op_result_column = first_operand | second_operand
+    elif op in ["__and__", "__rand__"]:
+        binary_op_result_column = first_operand & second_operand
+    elif op in ["add", "radd", "mul", "rmul"]:
+
+        # string/string case (only for add/radd)
+        if isinstance(second_datatype(), StringType) and isinstance(
+            first_datatype(), StringType
+        ):
+            if "add" == op:
+                binary_op_result_column = concat(first_operand, second_operand)
+            elif "radd" == op:
+                binary_op_result_column = concat(second_operand, first_operand)
+
+        # string/integer case (only for mul/rmul)
+        if op in ["mul", "rmul"] and (
+            (
+                isinstance(second_datatype(), _IntegralType)
+                and isinstance(first_datatype(), StringType)
+            )
+            or (
+                isinstance(second_datatype(), StringType)
+                and isinstance(first_datatype(), _IntegralType)
+            )
+        ):
+            # Snowflake's repeat doesn't support negative number
+            # but pandas will return an empty string
+
+            # swap first_operand with second_operand because REPEAT(<input>, <n>) expects <input> to be string
+            if isinstance(first_datatype(), _IntegralType):
+                first_operand, second_operand = second_operand, first_operand
+
+            binary_op_result_column = iff(
+                second_operand > pandas_lit(0),
+                repeat(first_operand, second_operand),
+                pandas_lit(""),
+            )
+
+    # If there is no special binary_op_result_column result, it means the operator and
+    # the data type of the column don't need special handling. Then we get the overloaded
+    # operator from Snowpark Column class, e.g., __add__ to perform binary operations.
+    if binary_op_result_column is None:
+        binary_op_result_column = getattr(first_operand, f"__{op}__")(second_operand)
+
+    return binary_op_result_column
+
+
+def compute_binary_op_between_snowpark_column_and_scalar(
+    op: str,
+    first_operand: SnowparkColumn,
+    datatype: Callable[[], DataType],
+    second_operand: Scalar,
+) -> SnowparkColumn:
+    """
+    Compute the binary operation between a Snowpark column and a scalar.
+    Args:
+        op: the name of binary operation
+        first_operand: The SnowparkColumn for lhs
+        datatype: Callable for Snowpark data type, this is lazy so we can avoid pulling the value if
+        it is not needed.
+        second_operand: Scalar value
+
+    Returns:
+        The result as a Snowpark column
+    """
+
+    def second_datatype() -> DataType:
+        return infer_object_type(second_operand)
+
+    second_operand = pandas_lit(second_operand)
+
+    return compute_binary_op_between_snowpark_columns(
+        op, first_operand, datatype, second_operand, second_datatype
+    )
+
+
+def compute_binary_op_between_scalar_and_snowpark_column(
+    op: str,
+    first_operand: Scalar,
+    second_operand: SnowparkColumn,
+    datatype: Callable[[], DataType],
+) -> SnowparkColumn:
+    """
+    Compute the binary operation between a scalar and a Snowpark column.
+    Args:
+        op: the name of binary operation
+        first_operand: Scalar value
+        second_operand: The SnowparkColumn for rhs
+        datatype: Callable for Snowpark data type, this is lazy so we can avoid pulling the value if
+        it is not needed.
+
+    Returns:
+        The result as a Snowpark column
+    """
+
+    def first_datatype() -> DataType:
+        return infer_object_type(first_operand)
+
+    first_operand = pandas_lit(first_operand)
+
+    return compute_binary_op_between_snowpark_columns(
+        op, first_operand, first_datatype, second_operand, datatype
+    )
+
+
+def compute_binary_op_with_fill_value(
+    op: str,
+    lhs: SnowparkColumn,
+    lhs_datatype: Callable[[], DataType],
+    rhs: SnowparkColumn,
+    rhs_datatype: Callable[[], DataType],
+    fill_value: Scalar,
+) -> SnowparkColumn:
+    """
+    Helper method for performing binary operations.
+    1. Fills NaN/None values in the lhs and rhs with the given fill_value.
+    2. Computes the binary operation expression for lhs <op> rhs.
+
+    fill_value replaces NaN/None values when only either lhs or rhs is NaN/None, not both lhs and rhs.
+    For instance, with fill_value = 100,
+    1. Given lhs = None and rhs = 10, lhs is replaced with fill_value.
+           result = lhs + rhs => None + 10 => 100 (replaced) + 10 = 110
+    2. Given lhs = 3 and rhs = None, rhs is replaced with fill_value.
+           result = lhs + rhs => 3 + None => 3 + 100 (replaced) = 103
+    3. Given lhs = None and rhs = None, neither lhs nor rhs is replaced since they both are None.
+           result = lhs + rhs => None + None => None.
+
+    Args:
+        op: pandas operation to perform between lhs and rhs
+        lhs: the lhs SnowparkColumn
+        lhs_datatype: Callable for Snowpark Datatype for lhs
+        rhs: the rhs SnowparkColumn
+        rhs_datatype: Callable for Snowpark Datatype for rhs
+        fill_value: Fill existing missing (NaN) values, and any new element needed for
+            successful DataFrame alignment, with this value before computation.
+
+    Returns:
+        SnowparkColumn expression for translated pandas operation
+    """
+    lhs_cond, rhs_cond = lhs, rhs
+    if fill_value is not None:
+        fill_value_lit = pandas_lit(fill_value)
+        lhs_cond = iff(lhs.is_null() & ~rhs.is_null(), fill_value_lit, lhs)
+        rhs_cond = iff(rhs.is_null() & ~lhs.is_null(), fill_value_lit, rhs)
+
+    return compute_binary_op_between_snowpark_columns(
+        op, lhs_cond, lhs_datatype, rhs_cond, rhs_datatype
+    )
+
+
+def merge_label_and_identifier_pairs(
+    sorted_column_labels: list[str],
+    q_frame_sorted: list[tuple[str, str]],
+    q_missing_sorted: list[tuple[str, str]],
+) -> list[tuple[str, str]]:
+    """
+    Helper function to create a merged list of column label/snowflake quoted identifiers. Assume q_frame_sorted and q_missing_sorted are disjoint wrt to labels.
+
+    Example:
+         Given sorted_column_labels = [1, 2, 3]
+         and q_frame_sorted =  [(1, "A"), (3, "C")]    q_missing_sorted =  [(2, "B")]
+         this function will produce as output [(1, "A"), (2, "B"), (3, "C")].
+         Each q_frame_sorted and q_missing_sorted are lists of label/identifier pairs.
+         I.e., [(1, "A"), (3, "C")] should be understood as 1 -> "A", 3 -> "B".
+         They're each assumed to be sorted with respect to their labels, and all labels must be contained within
+         the sorted_column_labels variable.
+         The result is a combined, sorted representation 1 -> "A", 2 -> "B", 3 -> "C" which resembles the merge-step
+         of a classical mergesort algorithm.
+    Args:
+        sorted_column_labels: The labels to merge for
+        q_frame_sorted: sorted list of label/identifier pairs. All labels must be contained within sorted_column_labels.
+        q_missing_sorted: sorted list of label/identifier pairs. All labels must be contained within sorted_column_labels.
+
+    Returns:
+        List of label/identifier pairs. If the labels were projected out, they would form sorted_column_labels.
+    """
+    if len(q_frame_sorted) > 0 and len(q_missing_sorted) > 0:
+        # merge labels/identifiers
+
+        i_frame = 0
+        i_missing = 0
+
+        pairs = []
+        for label in sorted_column_labels:
+            # Leave merge iff either queue is exhausted.
+            if i_frame >= len(q_frame_sorted) or i_missing >= len(q_missing_sorted):
+                break
+
+            if label == q_frame_sorted[i_frame][0]:
+                pairs.append(q_frame_sorted[i_frame])
+                i_frame += 1
+            elif label == q_missing_sorted[i_missing][0]:
+                pairs.append(q_missing_sorted[i_missing])
+                i_missing += 1
+            # else case not relevant here, because labels of q_frame_sorted and q_missing_sorted must be disjoint.
+
+        if i_frame < len(q_frame_sorted):
+            pairs += q_frame_sorted[i_frame:]
+        elif i_missing < len(q_missing_sorted):
+            pairs += q_missing_sorted[i_missing:]
+
+        return pairs
+    elif len(q_missing_sorted) == 0:
+        return q_frame_sorted
+    else:
+        return q_missing_sorted
+
+
+@dataclass
+class BinaryOperationPair:
+    # For detailed description of the members, cf. `prepare_binop_pairs_between_dataframe_and_dataframe`.
+    # This is a helper class to hold the results of this function.
+    identifier: str
+    lhs: SnowparkColumn
+    lhs_datatype: Callable
+    rhs: SnowparkColumn
+    rhs_datatype: Callable
+
+
+def prepare_binop_pairs_between_dataframe_and_dataframe(
+    aligned_rhs_and_lhs: JoinOrAlignInternalFrameResult,
+    combined_data_labels: list[Hashable],
+    lhs_frame: InternalFrame,
+    rhs_frame: InternalFrame,
+) -> list[BinaryOperationPair]:
+    """
+    Returns a list of BinaryOperationPair which can be used to carry out a binary operation between two dataframes.
+    Each BinaryOperationPair consists of the following:
+    - identifier: an identifier that can be used within align_result to hold the result of a binary operation between two columns
+    - lhs: a SnowparkColumn expression for the left operand
+    - lhs_typer: a function to lazily determine the Snowpark datatype of `lhs`
+    - rhs: a SnowparkColumn expression for the right operand
+    - rhs_typer: a function to lazily determine the Snowpark datatype of `rhs`
+
+    BinaryOperationPair will be returned in the order of `combined_data_labels`
+
+    Args:
+        aligned_rhs_and_lhs: the align result between other_frame and self_frame
+        combined_data_labels: the combined data labels to be used for align result.
+        rhs_frame: a frame representing the right side.
+        lhs_frame: a frame representing the left side.
+
+    Returns:
+        List of BinaryOperationPair.
+    """
+    # construct list of pairs which label belongs to which quoted identifier
+    type_map = aligned_rhs_and_lhs.result_frame.quoted_identifier_to_snowflake_type()
+    left_right_pairs = []
+    for label in combined_data_labels:
+        left_identifier, right_identifier = None, None
+
+        try:
+            left_idx = lhs_frame.data_column_pandas_labels.index(label)
+            left_quoted_identifier = lhs_frame.data_column_snowflake_quoted_identifiers[
+                left_idx
+            ]
+            left_identifier = (
+                aligned_rhs_and_lhs.result_column_mapper.left_quoted_identifiers_map[
+                    left_quoted_identifier
+                ]
+            )
+            left = col(left_identifier)
+            # To avoid referencing always the last right_identifier in the loop, use functools.partial
+            left_typer = functools.partial(
+                lambda identifier: type_map[identifier], left_identifier
+            )  # noqa: E731
+        except ValueError:
+            # lhs label not in list.
+            left = pandas_lit(None)
+            left_typer = lambda: infer_object_type(  # type: ignore[assignment] # noqa: E731
+                None
+            )
+
+        try:
+            right_idx = rhs_frame.data_column_pandas_labels.index(label)
+            right_quoted_identifier = (
+                rhs_frame.data_column_snowflake_quoted_identifiers[right_idx]
+            )
+            right_identifier = (
+                aligned_rhs_and_lhs.result_column_mapper.right_quoted_identifiers_map[
+                    right_quoted_identifier
+                ]
+            )
+            right = col(right_identifier)
+            # To avoid referencing always the last right_identifier in the loop, use functools.partial
+            right_typer = functools.partial(
+                lambda identifier: type_map[identifier], right_identifier
+            )  # noqa: E731
+        except ValueError:
+            # rhs label not in list
+            right = pandas_lit(None)
+            right_typer = lambda: infer_object_type(None)  # type: ignore[assignment] # noqa: E731
+
+        identifier_to_replace = left_identifier or right_identifier
+        assert identifier_to_replace, "either identifier must be valid"
+
+        # We return a lambda to determine the datatype of each operand lazily as this allows to defer
+        # invoking a DESCRIBE query as much as possible.
+        left_right_pairs.append(
+            BinaryOperationPair(
+                identifier=identifier_to_replace,
+                lhs=left,
+                lhs_datatype=left_typer,
+                rhs=right,
+                rhs_datatype=right_typer,
+            )
+        )
+    return left_right_pairs
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py
new file mode 100644
index 00000000000..7da729837bf
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py
@@ -0,0 +1,347 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from collections.abc import Hashable, Sequence
+from typing import Literal, Optional, Union
+
+import pandas as native_pd
+
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark.functions import array_construct
+from snowflake.snowpark.modin.plugin._internal import join_utils
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import OrderingColumn
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    INDEX_LABEL,
+    append_columns,
+    pandas_lit,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+
+CONCAT_POSITION_COLUMN_LABEL = "concat_position"
+
+
+def add_keys_as_column_levels(
+    columns: native_pd.Index,
+    frames: list[InternalFrame],
+    keys: Sequence[Hashable],
+    names: Union[list[Hashable], None],
+) -> native_pd.Index:
+    """
+    Concat all column names from given ``frames``. Also add ``keys`` as outermost
+    level of column labels.
+    Args:
+        columns: Column index of concatenated frame.
+        frames: A list of internal frames.
+        keys: A list of hashable to be used as keys. Length of keys must be same as
+          length of frames.
+        names: Optional names for levels in column index.
+
+    Returns:
+        Concatenated column names as native pandas index.
+    """
+    assert len(keys) == len(frames), "Length of keys must be same as length of frames"
+
+    key_values = []
+    for key, frame in zip(keys, frames):
+        key_values.extend([key] * len(frame.data_column_pandas_labels))
+    keys_index = native_pd.Index(key_values)
+    # Add 'keys' as outermost level to column labels.
+    arrays = [keys_index.get_level_values(i) for i in range(keys_index.nlevels)] + [
+        columns.get_level_values(i) for i in range(columns.nlevels)
+    ]
+    columns = native_pd.MultiIndex.from_arrays(arrays)
+    names = names or []
+    # Fill with 'None' to match the number of levels in column index
+    while len(names) < columns.nlevels:
+        names.append(None)
+    return columns.set_names(names)
+
+
+def convert_to_single_level_index(frame: InternalFrame, axis: int) -> InternalFrame:
+    """
+    If index on given axis is a MultiIndex, convert it to single level index of tuples.
+    Do nothing if index on given axis has only one level.
+
+    On axis=1, this is equivalent to following operation in pandas.
+    df.columns = df.columns.to_flat_index()
+    For example a frame if columns index
+    pd.MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'y'])
+    will be converted to a frame with column index
+    pd.Index([('a', 'b'), ('c', 'd')])
+
+    Similarly on axis=0 this is equivalent to following operations in pandas
+    df.index = df.index.to_flat_index()
+
+    NOTE: Original level names are lost during this operation becomes None.
+
+    Args:
+        frame: A InternalFrame.
+        axis: int: {0, 1}
+
+    Returns:
+        New InternalFrame with single level index.
+
+    """
+    assert axis in (0, 1), f"Invalid axis {axis}, allowed values are 0 and 1"
+    # Because we break up and store a MultiIndex with several Snowpark columns, we can
+    # perform the single-level index conversion as a no-op.
+    if frame.num_index_levels(axis=axis) == 1:
+        return frame
+    if axis == 1:
+        return InternalFrame.create(
+            ordered_dataframe=frame.ordered_dataframe,
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+            # Setting length of index names to 1 will convert column labels from
+            # multi-index to single level index.
+            data_column_pandas_index_names=[None],
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+        )
+    else:
+        WarningMessage.tuples_stored_as_array(
+            "MultiIndex values are compressed to single index of tuples.Snowflake"
+            " backend doesn't support tuples datatype. Tuple row labels are stored as"
+            "ARRAY"
+        )
+        index_identifier = (
+            frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[INDEX_LABEL],
+            )[0]
+        )
+        ordered_dataframe = append_columns(
+            frame.ordered_dataframe,
+            index_identifier,
+            array_construct(*frame.index_column_snowflake_quoted_identifiers),
+        )
+        return InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=[index_identifier],
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        )
+
+
+def union_all(
+    frame1: InternalFrame,
+    frame2: InternalFrame,
+    join: Literal["inner", "outer"],
+    sort: Optional[bool] = False,
+) -> InternalFrame:
+    """
+    Concatenate frames on index axis by taking using UNION operator.
+    Snowflake identifiers of output frame are based on snowflake identifiers from first
+    frame.
+    Args:
+        frame1: First frame
+        frame2: Second frame
+        join: How to handle column index
+            'inner': Output frame contains only overlapping columns from both frames.
+            'outer': Output frame contains union of columns from both frames.
+        sort: Sort column axis if True.
+
+    Returns:
+        New InternalFrame after taking union of given frames.
+    """
+    columns1 = frame1.data_columns_index
+    columns2 = frame2.data_columns_index
+
+    if join == "inner":
+        # Preserves the order from calling index.
+        # For example:
+        # pd.Index([3, 1, 2]).intersection(pd.Index([1, 2, 3]) will result in
+        # pd.Index([3, 1, 2])
+        data_column_labels = columns1.intersection(columns2, sort=False)
+    elif join == "outer":
+        # Preserves the order from calling index. And for labels not in calling index
+        # preserves the order from argument index.
+        # For example:
+        # pd.Index([3, 1, 2]).union(pd.Index([1, 4, 2, 3, 5]) will result in
+        # pd.Index([3, 1, 2, 4, 5])
+        data_column_labels = columns1.union(columns2, sort=False)
+    else:
+        raise AssertionError(
+            f"Invalid join type '{join}'. Accepted values are 'inner' and 'outer'"
+        )
+    if sort:
+        data_column_labels = data_column_labels.sort_values()
+
+    frame1 = _select_columns(frame1, data_column_labels.tolist())
+    frame2 = _select_columns(frame2, data_column_labels.tolist())
+
+    frame1, frame2 = join_utils.convert_incompatible_types_to_variant(
+        frame1,
+        frame2,
+        frame1.ordered_dataframe.projected_column_snowflake_quoted_identifiers,
+        frame2.ordered_dataframe.projected_column_snowflake_quoted_identifiers,
+    )
+
+    # select data + index + ordering columns for union all
+    # it is guaranteed that the ordering columns does not overlap with index and data column
+    # TODO SNOW-956072: remove the following code after removing convert_incompatible_types_to_variant
+    frame1_identifiers_for_union_all = (
+        frame1.index_column_snowflake_quoted_identifiers
+        + frame1.data_column_snowflake_quoted_identifiers
+        + frame1.ordering_column_snowflake_quoted_identifiers
+    )
+    frame2_identifiers_for_union_all = (
+        frame2.index_column_snowflake_quoted_identifiers
+        + frame2.data_column_snowflake_quoted_identifiers
+        + frame2.ordering_column_snowflake_quoted_identifiers
+    )
+
+    # In Snowflake UNION ALL operator, the names of the output columns are based on the
+    # names of the columns of the first query. So here we copy identifiers from
+    # first frame.
+    # Reference: https://docs.snowflake.com/en/sql-reference/operators-query
+    ordered_dataframe = frame1.ordered_dataframe.select(
+        frame1_identifiers_for_union_all
+    ).union_all(frame2.ordered_dataframe.select(frame2_identifiers_for_union_all))
+    ordered_dataframe = ordered_dataframe.sort(frame1.ordering_columns)
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=frame1.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=frame1.data_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=frame1.data_column_pandas_index_names,
+        index_column_pandas_labels=frame1.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame1.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def add_key_as_index_columns(frame: InternalFrame, key: Hashable) -> InternalFrame:
+    """
+    Add given 'key' as outermost index columns to given 'frame'.
+    If 'key' is a tuple multiple columns are added for each element in tuple.
+
+    Args:
+        frame: InternalFrame
+        key: key to add as index column
+
+    Returns:
+        A InternalFrame after adding 'key' as index columns.
+    """
+    if not isinstance(key, tuple):
+        key = tuple([key])
+    new_identifiers = frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[INDEX_LABEL] * len(key),
+    )
+    col_values = [pandas_lit(value) for value in key]
+    ordered_dataframe = append_columns(
+        frame.ordered_dataframe, new_identifiers, col_values
+    )
+
+    # Add key as outermost index columns.
+    index_column_pandas_labels = [None] * len(key) + frame.index_column_pandas_labels
+    index_column_snowflake_quoted_identifiers = (
+        new_identifiers + frame.index_column_snowflake_quoted_identifiers
+    )
+
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        index_column_pandas_labels=index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+    )
+
+
+def _select_columns(
+    frame: InternalFrame, data_column_labels: list[Hashable]
+) -> InternalFrame:
+    """
+    Select only the given labels from given frame. If any data column label is missing
+    in frame add new column with NULL values.
+
+    Args:
+        frame: An InternalFrame
+        data_column_labels: A list of pandas labels.
+
+    Returns:
+        New InternalFrame after only with given data columns.
+
+    """
+    select_list: list[ColumnOrName] = []
+
+    # Add index columns
+    select_list.extend(frame.index_column_snowflake_quoted_identifiers)
+
+    # Add ordering columns
+    select_list.extend(frame.ordering_column_snowflake_quoted_identifiers)
+
+    snowflake_ids = frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+        data_column_labels, include_index=False
+    )
+    # Add data columns
+    data_column_snowflake_identifiers = []
+    # A map to keep track number of times a label is already seen.
+    # Native pandas fails with IndexError when either frame has duplicate labels, with
+    # the exception when both frames have exact same lables and exact same order.
+    # In Snowpark pandas, we don't fail concat when duplicates lables are present but
+    # try to match as many columns as possible from the frames.
+    label_count_map: dict[Hashable, int] = {}
+    for label, id_tuple in zip(data_column_labels, snowflake_ids):
+        if len(id_tuple) <= label_count_map.get(label, 0):
+            # if missing add new column to frame with NULL values.
+            snowflake_id = (
+                frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[label]
+                )[0]
+            )
+            select_list.append(pandas_lit(None).as_(snowflake_id))
+        else:
+            index = label_count_map.get(label, 0)
+            snowflake_id = id_tuple[index]
+            select_list.append(snowflake_id)
+            label_count_map[label] = index + 1
+
+        data_column_snowflake_identifiers.append(snowflake_id)
+    return InternalFrame.create(
+        ordered_dataframe=frame.ordered_dataframe.select(select_list),
+        data_column_pandas_labels=data_column_labels,
+        data_column_snowflake_quoted_identifiers=data_column_snowflake_identifiers,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def add_global_ordering_columns(frame: InternalFrame, position: int) -> InternalFrame:
+    """
+    To create global ordering for concat (axis=0) operation we first ensure a
+    row position column for local ordering within the frame. Then add another
+    column to indicate position of this frame among concat frames given by 'position'
+    parameter.
+    Now these two columns can be used to determine global ordering.
+    Args:
+        frame: Internal frame.
+        position: position of this frame among all frames being concatenated.
+
+    Returns:
+        A new frame with updated ordering columns.
+
+    """
+    frame = frame.ensure_row_position_column()
+    ordered_dataframe = frame.ordered_dataframe.sort(
+        [OrderingColumn(frame.row_position_snowflake_quoted_identifier)]
+    )
+    identifier = ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[CONCAT_POSITION_COLUMN_LABEL],
+    )[0]
+    ordered_dataframe = append_columns(
+        ordered_dataframe, identifier, pandas_lit(position)
+    )
+    ordered_dataframe = ordered_dataframe.sort(
+        OrderingColumn(identifier), *ordered_dataframe.ordering_columns
+    )
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+    )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/cumulative_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/cumulative_utils.py
new file mode 100644
index 00000000000..ce13b0082f7
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/cumulative_utils.py
@@ -0,0 +1,201 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+#
+# This file contains utils functions used by cumulative aggregation functions.
+#
+
+import functools
+from typing import Any, Callable
+
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import col, iff, sum as sum_sp
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    drop_non_numeric_data_columns,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    check_is_groupby_supported_by_snowflake,
+    extract_groupby_column_pandas_labels,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.compiler import snowflake_query_compiler
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL
+from snowflake.snowpark.window import Window
+
+
+def get_cumagg_col_to_expr_map_axis0(
+    internal_frame: InternalFrame,
+    cumagg_func: Callable,
+    skipna: bool,
+) -> dict[SnowparkColumn, SnowparkColumn]:
+    """
+    Map each input column to to a corresponding expression that computes the cumulative aggregation function on that column when axis = 0.
+
+    Args:
+        internal_frame: InternalFrame.
+            The internal frame to apply the cumulative aggregation function on.
+        cumagg_func: Callable
+            The cumulative aggregation function to apply on the internal frame.
+        skipna : bool
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+
+    Returns:
+        Dict[SnowparkColumn, SnowparkColumn]
+            Map between Snowpandas column and the corresponding expression that computes the cumulative aggregation function on that column.
+    """
+    window = Window.order_by(
+        internal_frame._modin_frame.row_position_snowflake_quoted_identifier
+    ).rows_between(Window.UNBOUNDED_PRECEDING, Window.CURRENT_ROW)
+    if skipna:
+        cumagg_col_to_expr_map = {
+            snowflake_quoted_id: iff(
+                col(snowflake_quoted_id).is_null(),
+                pandas_lit(None),
+                cumagg_func(snowflake_quoted_id).over(window),
+            )
+            for snowflake_quoted_id in internal_frame._modin_frame.data_column_snowflake_quoted_identifiers
+        }
+    else:
+        # When skipna is False and the aggregated values (form prior rows) contain any nulls, then the cumulative aggregate is also null.
+        # For this reason, we count the number of nulls in the window and compare to zero using the two nested iff's below.
+        # Note that this could have also been achieved using COUNT_IF(), but as of this writing it has not been supported by Snowpark yet.
+        cumagg_col_to_expr_map = {
+            snowflake_quoted_id: iff(
+                sum_sp(
+                    iff(
+                        col(snowflake_quoted_id).is_null(), pandas_lit(1), pandas_lit(0)
+                    )
+                ).over(window)
+                > pandas_lit(0),
+                pandas_lit(None),
+                cumagg_func(snowflake_quoted_id).over(window),
+            )
+            for snowflake_quoted_id in internal_frame._modin_frame.data_column_snowflake_quoted_identifiers
+        }
+    return cumagg_col_to_expr_map
+
+
+def get_groupby_cumagg_frame_axis0(
+    query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    by: Any,
+    axis: int,
+    numeric_only: bool,
+    groupby_kwargs: dict[str, Any],
+    cumagg_func: Callable,
+    cumagg_func_name: str,
+    ascending: bool = True,
+) -> InternalFrame:
+    """
+    Return the output internal frame after applying the cumulative aggregation function on the input internal frame when axis = 0.
+
+    Args:
+        by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+            Used to determine the groups for the groupby.
+        axis : 0 (index), 1 (columns)
+        numeric_only: bool
+            Include only float, int, boolean columns.
+        groupby_kwargs: Dict[str, Any]
+            keyword arguments passed for the groupby.
+        cumagg_func: Callable
+            The cumulative aggregation function to apply on the internal frame.
+        cumagg_func_name: str
+            The name of the cumulative aggregation function to apply on the internal frame.
+        ascending : bool
+            If False, process the window in reverse order. Needed for cumcount.
+
+    Returns:
+        InternalFrame
+            Output internal frame after applying the cumulative aggregation function.
+    """
+    level = groupby_kwargs.get("level", None)
+    dropna = groupby_kwargs.get("dropna", True)
+
+    if not check_is_groupby_supported_by_snowflake(by, level, axis):
+        ErrorMessage.not_implemented(
+            f"GroupBy {cumagg_func_name} with by = {by}, level = {level} and axis = {axis} is not supported yet in Snowpark pandas."
+        )
+
+    if level is not None and level != 0:
+        ErrorMessage.not_implemented(
+            f"GroupBy {cumagg_func_name} with level = {level} is not supported yet in Snowpark pandas."
+        )
+
+    by_list = extract_groupby_column_pandas_labels(query_compiler, by, level)
+
+    qc = query_compiler
+    if numeric_only:
+        qc = drop_non_numeric_data_columns(query_compiler, by_list)
+
+    by_snowflake_quoted_identifiers_list = [
+        # Duplicate labels in by result in a ValueError.
+        entry[0]
+        for entry in qc._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            by_list
+        )
+    ]
+
+    window = (
+        Window.partition_by(by_snowflake_quoted_identifiers_list)
+        .order_by(
+            qc._modin_frame.ordered_dataframe.ordering_column_snowflake_quoted_identifiers
+        )
+        .rows_between(
+            Window.UNBOUNDED_PRECEDING if ascending else Window.CURRENT_ROW,
+            Window.CURRENT_ROW if ascending else Window.UNBOUNDED_FOLLOWING,
+        )
+    )
+
+    dropna_cond = functools.reduce(
+        lambda combined_col, col: combined_col | col,
+        map(
+            lambda by_snowflake_quoted_identifier: col(
+                by_snowflake_quoted_identifier
+            ).is_null(),
+            by_snowflake_quoted_identifiers_list,
+        ),
+    )
+
+    pandas_labels = []
+    new_columns = []
+    if cumagg_func_name == "cumcount":
+        new_col = cumagg_func("*").over(window) - pandas_lit(1)
+        if dropna:
+            new_col = iff(dropna_cond, pandas_lit(None), new_col)
+        if qc._modin_frame.num_index_columns > 1:
+            pandas_labels.append(
+                (MODIN_UNNAMED_SERIES_LABEL,) * qc._modin_frame.num_index_columns
+            )
+        else:
+            pandas_labels.append(MODIN_UNNAMED_SERIES_LABEL)
+        new_columns.append(new_col)
+    else:
+        for pandas_label, snowflake_quoted_identifier in zip(
+            qc._modin_frame.data_column_pandas_labels,
+            qc._modin_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            if snowflake_quoted_identifier not in by_snowflake_quoted_identifiers_list:
+                new_col = iff(
+                    col(snowflake_quoted_identifier).is_null(),
+                    pandas_lit(None),
+                    cumagg_func(snowflake_quoted_identifier).over(window),
+                )
+                if dropna:
+                    new_col = iff(dropna_cond, pandas_lit(None), new_col)
+
+                pandas_labels.append(pandas_label)
+                new_columns.append(new_col)
+
+    result_frame = qc._modin_frame.project_columns(pandas_labels, new_columns)
+    if cumagg_func_name == "cumcount":
+        return InternalFrame.create(
+            ordered_dataframe=result_frame.ordered_dataframe,
+            data_column_pandas_labels=[None],
+            data_column_snowflake_quoted_identifiers=result_frame.data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=result_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=result_frame.index_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=[None],
+        )
+    else:
+        return result_frame
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py
new file mode 100644
index 00000000000..771c63eb2ac
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py
@@ -0,0 +1,314 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from typing import Sequence, Union
+
+import numpy as np
+import pandas
+from pandas import Index, IntervalIndex
+from pandas._typing import Scalar
+from pandas.core.dtypes.common import is_numeric_dtype
+from pandas.core.dtypes.inference import is_scalar
+from pandas.core.reshape.tile import _is_dt_or_td
+
+from snowflake.snowpark.functions import col, iff
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.types import LongType
+
+
+# This function stems from pandas 2.2.x and has been minimally modified to not require
+# the full data, but instead work with min/max values solely. It replaces
+# The pandas 2.1.x function from pandas.core.reshape.tile import _convert_bin_to_numeric_type.
+def _nbins_to_bins(x_min: Scalar, x_max: Scalar, nbins: int, right: bool) -> Index:
+    """
+    If a user passed an integer N for bins, convert this to a sequence of N
+    equal(ish)-sized bins.
+    """
+    if is_scalar(nbins) and nbins < 1:
+        raise ValueError("`bins` should be a positive integer.")  # pragma: no cover
+
+    # this snippet of original pandas code is handled outside of this function
+    # if x_idx.size == 0:
+    #    raise ValueError("Cannot cut empty array")
+
+    # retrieve type of original series used in cut. To speed up processing,
+    # infer from aggrgates as the type won't change when computing min/max.
+    x_dtype = pandas.Series([x_min, x_max]).dtype
+    rng = (x_min, x_max)
+    mn, mx = rng
+
+    if is_numeric_dtype(x_dtype) and (np.isinf(mn) or np.isinf(mx)):
+        # GH#24314
+        raise ValueError(  # pragma: no cover
+            "cannot specify integer `bins` when input data contains infinity"  # pragma: no cover
+        )  # pragma: no cover
+
+    if mn == mx:  # adjust end points before binning
+        if _is_dt_or_td(x_dtype):  # pragma: no cover
+            # original pandas code (commented):
+            # # using seconds=1 is pretty arbitrary here
+            # # error: Argument 1 to "dtype_to_unit" has incompatible type
+            # # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]"
+            # unit = dtype_to_unit(x_dtype)  # type: ignore[arg-type]
+            # td = Timedelta(seconds=1).as_unit(unit)
+            # # Use DatetimeArray/TimedeltaArray method instead of linspace
+            # # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
+            # # has no attribute "_generate_range"
+            # bins = x_idx._values._generate_range(  # type: ignore[union-attr]
+            #     start=mn - td, end=mx + td, periods=nbins + 1, freq=None, unit=unit
+            # )
+            ErrorMessage.not_implemented(
+                "no support for datetime types yet."
+            )  # pragma: no cover
+        else:
+            mn -= 0.001 * abs(mn) if mn != 0 else 0.001  # pragma: no cover
+            mx += 0.001 * abs(mx) if mx != 0 else 0.001  # pragma: no cover
+
+            bins = np.linspace(mn, mx, nbins + 1, endpoint=True)  # pragma: no cover
+    else:  # adjust end points after binning
+        if _is_dt_or_td(x_dtype):
+            # original pandas code (commented):
+            # # Use DatetimeArray/TimedeltaArray method instead of linspace
+            #
+            # # error: Argument 1 to "dtype_to_unit" has incompatible type
+            # # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]"
+            # unit = dtype_to_unit(x_dtype)  # type: ignore[arg-type]
+            # # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
+            # # has no attribute "_generate_range"
+            # bins = x_idx._values._generate_range(  # type: ignore[union-attr]
+            #     start=mn, end=mx, periods=nbins + 1, freq=None, unit=unit
+            # )
+            ErrorMessage.not_implemented(
+                "no support for datetime types yet."
+            )  # pragma: no cover
+        else:
+            bins = np.linspace(mn, mx, nbins + 1, endpoint=True)
+        adj = (mx - mn) * 0.001  # 0.1% of the range
+        if right:
+            bins[0] -= adj
+        else:
+            bins[-1] += adj
+
+    return Index(bins)
+
+
+def preprocess_bins_for_cut(
+    x_min: Scalar,
+    x_max: Scalar,
+    bins: Union[int, Sequence[Scalar], pandas.IntervalIndex],
+    right: bool,
+    include_lowest: bool,
+    precision: int,
+) -> Union[int, Sequence[Scalar], pandas.IntervalIndex]:
+    """
+    Adjusts bins to be directly used with compute_bin_indices function below. bins for both qcut and cut are given either as int which will create equidistant bins,
+     as list of scalars (typically float), or IntervalIndex (not supported).
+
+    Args:
+        x_min: minimum value of the data which will be binned
+        x_max: maximum value of the data which will be binned
+        bins: the bins according to pandas which will define the buckets
+        right: if True use left-open intervals (a, b], if False use right-open intervals [a, b)
+        include_lowest: If True and right is True, adjust the first interval by 10 ** (-precision), i.e. the first interval will be (a-10 ** (-precision), b]. This will include the minimum value in the binning process.
+        precision: only used together with include_lowest to adjust the first bin (cf. include_lowest)
+
+    Returns:
+        adjusted bins
+    """
+    # Code is mostly from original pandas and adjusted for Snowpark pandas API.
+
+    if not np.iterable(bins):
+        # Call adjusted function from pandas 2.2.x branch
+        bins = _nbins_to_bins(x_min, x_max, bins, right)
+
+    elif isinstance(bins, IntervalIndex):
+        if bins.is_overlapping:  # pragma: no cover
+            raise ValueError(
+                "Overlapping IntervalIndex is not accepted."
+            )  # pragma: no cover
+
+    else:
+        bins = Index(bins)
+        if not bins.is_monotonic_increasing:
+            raise ValueError("bins must increase monotonically.")
+
+    # if include_lowest is True, then expand first bucket by 10 ** (-precision)
+    # I.e., for right=True, intervals will have the form (a, b].
+    # If a is now contained in the values, it will fall into (a - 10**(-precision), b].
+    # For right=False, this is irrelevant. The expansion only works for right=True.
+    if include_lowest and right:
+        bins = Index([bins[0] - 10 ** (-precision)] + list(bins[1:].values))
+
+    return bins
+
+
+def compute_bin_indices(
+    values_frame: InternalFrame,
+    cuts_frame: InternalFrame,
+    n_cuts: int,
+    right: bool = True,
+) -> InternalFrame:
+    """
+    Given a frame of cuts, i.e. borders of bins (strictly increasing) compute for the data in values_frame the index of the bin they fall into.
+    E.g., cuts_frame may contain the following data
+    0.0, 3.0, 7.8, 10.0
+    This would form the following bins (0.0, 3.0], (3.0, 7.8], (7.8, 10.0].
+    Consequently, this function will return indices in the range 0...2, e.g. for the following data
+
+    -10.0, 0.0, 1.0, 5.6, 9.0, 10.0, 11.0
+
+    the following bin indices
+
+    nan, nan,  0.,  1.,  2.,  2., nan
+
+    Note that NULL (nan) is returned for data which lies outside of the cuts provided.
+
+    Args:
+        values_frame: an InternalFrame representing a Series, the data to be binned.
+        cuts_frame: an InternalFrame representing a Series with data being a strictly monotonically
+         increasing sequence of floating numbers forming the border of bins.
+        n_cuts: The length of cuts_frame. Passed in as separate parameter to avoid an additional query.
+        right: if True use left-open intervals (a, b], if False use right-open intervals [a, b).
+    Returns:
+        InternalFrame representing a Series with the bin indices. indices will be in the range [0, n_cuts - 1].
+    """
+
+    # There will be 0, ..., len(cuts_frame) - 1 buckets, result will be thus in this range.
+    # We can find for values the cut they belong to by performing a left <= join. As this feature is not supported
+    # within OrderedDataFrame yet, we use the Snowpark layer directly. This should have no negative
+    # consequences when it comes to building lazy graphs, as both cut and qcut are materializing operations.
+
+    cuts_frame = cuts_frame.ensure_row_position_column()
+    value_frame = values_frame.ensure_row_position_column()
+
+    (
+        bucket_data_identifier,
+        bucket_row_position_identifier,
+        value_data_identifier,
+        value_row_position_identifier,
+    ) = value_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=["b_data", "b_row_pos", "v_data", "v_row_pos"]
+    )
+
+    value_index_identifiers = value_frame.index_column_snowflake_quoted_identifiers
+
+    bucket_snowpark_frame = (
+        cuts_frame.ordered_dataframe.to_projected_snowpark_dataframe(True, True, True)
+    )
+    value_snowpark_frame = (
+        value_frame.ordered_dataframe.to_projected_snowpark_dataframe(True, True, True)
+    )
+
+    # relabel to new identifiers to reference within range join below.
+    bucket_snowpark_frame = bucket_snowpark_frame.select(
+        col(cuts_frame.data_column_snowflake_quoted_identifiers[0]).as_(
+            bucket_data_identifier
+        ),
+        col(cuts_frame.row_position_snowflake_quoted_identifier).as_(
+            bucket_row_position_identifier
+        ),
+    )
+
+    value_snowpark_frame = value_snowpark_frame.select(
+        *tuple(value_index_identifiers),
+        col(value_frame.data_column_snowflake_quoted_identifiers[0]).as_(
+            value_data_identifier
+        ),
+        col(value_frame.row_position_snowflake_quoted_identifier).as_(
+            value_row_position_identifier
+        ),
+    )
+
+    # Perform a left join. The idea is to find all values which fall into an interval
+    # defined by the cuts/bins in the bucket frame. The closest can be then identified using the
+    # row position. An alternative to this
+    # was to use an ASOF join with a proper matching condition.
+
+    if right:
+        ans = value_snowpark_frame.join(
+            bucket_snowpark_frame,
+            value_snowpark_frame[value_data_identifier]
+            <= bucket_snowpark_frame[bucket_data_identifier],
+            how="left",
+            lsuffix="_L",
+            rsuffix="_R",
+        )
+
+        # Result will be v_row_pos and min(b_row_pos) - 1. However, to deal with the edge cases we need to correct
+        # for the case when the result is in the left-most interval.
+        ans = ans.group_by(
+            value_index_identifiers
+            + [value_data_identifier, value_row_position_identifier]
+        ).min(bucket_row_position_identifier)
+    else:
+        # For right=False, perform a >= join and use max(b_row_pos) - 1.
+        ans = value_snowpark_frame.join(
+            bucket_snowpark_frame,
+            value_snowpark_frame[value_data_identifier]
+            >= bucket_snowpark_frame[bucket_data_identifier],
+            how="left",
+            lsuffix="_L",
+            rsuffix="_R",
+        )
+
+        # Result will be v_row_pos and max(q_row_pos) - 1. However, to deal with the edge cases we need to correct
+        # for the case when the result is in the left-most interval.
+        ans = ans.group_by(
+            value_index_identifiers
+            + [value_data_identifier, value_row_position_identifier]
+        ).max(bucket_row_position_identifier)
+
+    column_names = ans.columns
+    bin_index_col = col(column_names[-1])
+
+    if right:
+        # An index value of 0 means the data is outside of the first bucket. Set to NULL. All others, perform -1.
+        # For data outside of the last bucket, the left join will automatically fill it with NULL.
+        correct_index_expr = iff(
+            bin_index_col != pandas_lit(0),
+            bin_index_col - pandas_lit(1),
+            pandas_lit(None),
+        ).astype(LongType())
+    else:
+        # For right=False, correct for the bin indices exceeding the max value n_cuts - 1. If the index is larger
+        # than this number, then set to NULL.
+        correct_index_expr = iff(
+            bin_index_col >= pandas_lit(n_cuts - 1), pandas_lit(None), bin_index_col
+        ).astype(LongType())
+
+    ans = ans.select(
+        *tuple(value_index_identifiers),
+        col(value_row_position_identifier),
+        correct_index_expr,
+    )
+    column_names = ans.columns
+    new_data_identifier = column_names[-1]
+
+    # Create OrderedDataFrame and InternalFrame and QC out of this.
+    # Need to restore index as well which has been passed through.
+    new_ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(ans),
+        projected_column_snowflake_quoted_identifiers=value_index_identifiers
+        + [new_data_identifier],
+        ordering_columns=[OrderingColumn(value_row_position_identifier)],
+        row_position_snowflake_quoted_identifier=value_row_position_identifier,
+    )
+
+    new_frame = InternalFrame.create(
+        ordered_dataframe=new_ordered_dataframe,
+        data_column_pandas_labels=value_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=value_frame.data_column_index_names,
+        data_column_snowflake_quoted_identifiers=[new_data_identifier],
+        index_column_pandas_labels=value_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=value_index_identifiers,
+    )
+
+    return new_frame
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/frame.py b/src/snowflake/snowpark/modin/plugin/_internal/frame.py
new file mode 100644
index 00000000000..598839e01ce
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/frame.py
@@ -0,0 +1,1196 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+from collections.abc import Hashable
+from dataclasses import dataclass
+from logging import getLogger
+from typing import Any, Callable, NamedTuple, Optional, Union
+
+import pandas as pd
+from pandas._typing import IndexLabel
+from pandas.core.dtypes.common import is_object_dtype
+
+from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    quote_name_without_upper_casing,
+)
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import col, last_value
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.type_utils import TypeMapper
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    DEFAULT_DATA_COLUMN_LABEL,
+    INDEX_LABEL,
+    ROW_POSITION_COLUMN_LABEL,
+    append_columns,
+    assert_duplicate_free,
+    count_rows,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+    fill_missing_levels_for_pandas_label,
+    from_pandas_label,
+    get_distinct_rows,
+    is_valid_snowflake_quoted_identifier,
+    snowpark_to_pandas_helper,
+    to_pandas_label,
+)
+from snowflake.snowpark.modin.plugin._typing import (
+    LabelIdentifierPair,
+    LabelTuple,
+    PandasLabelToSnowflakeIdentifierPair,
+)
+from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL
+from snowflake.snowpark.types import DataType
+from snowflake.snowpark.window import Window
+
+logger = getLogger(__name__)
+
+LEFT_PREFIX = "left"
+RIGHT_PREFIX = "right"
+
+
+class UpdatedInternalFrameResult(NamedTuple):
+    """Contains the updated internal frame and mapping from old ids to new ids."""
+
+    frame: "InternalFrame"
+    old_id_to_new_id_mappings: dict[str, str]
+
+
+@dataclass(frozen=True)
+class InternalFrame:
+    """
+    internal abstraction of storage format to hold all information necessary to represent
+    a pandas.DataFrame within Snowflake
+    """
+
+    # OrderedDataFrame representation of the state of the data hold by this internal frame
+    # Ordering columns and row position column are maintained by OrderedDataFrame
+    ordered_dataframe: OrderedDataFrame
+    # Map between label and snowflake quoted identifier.
+    # This map is maintained as an ordered list, which must be in the order of
+    # pandas index columns + pandas data columns.
+    # For MultiIndex as df.columns, the pandas label will be a tuple for each column.
+    # An example of MultiIndex as df.columns:
+    # pd.MultiIndex.from_tuples([('baz', 'A'), ('baz', 'B'), ('zoo', 'A'), ('zoo', 'B')])
+    # the pandas labels of data columns will be [('baz', 'A'), ('baz', 'B'), ('zoo', 'A'), ('zoo', 'B')]
+    label_to_snowflake_quoted_identifier: tuple[LabelIdentifierPair, ...]
+    # Number of index columns for the pandas dataframe, where the first num_index_columns elements
+    # of pandas_label_to_snowflake_quoted_identifier is for the pandas index columns
+    num_index_columns: int
+    # Store pandas labels for columns' index name or multiindex names, e.g., the labels is used to generate
+    # df.columns.names
+    # The length of data_column_index_names equals to number of multiindex levels.
+    # For a 3-level MultiIndex, the value can be like ['A', 'B', 'C']
+    data_column_index_names: tuple[LabelTuple, ...]
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        ordered_dataframe: OrderedDataFrame,
+        data_column_pandas_labels: list[Hashable],
+        data_column_pandas_index_names: list[Hashable],
+        data_column_snowflake_quoted_identifiers: list[str],
+        index_column_pandas_labels: list[Hashable],
+        index_column_snowflake_quoted_identifiers: list[str],
+    ) -> "InternalFrame":
+        """
+        Args:
+            ordered_dataframe: underlying ordered dataframe used
+            data_column_pandas_labels: A list of pandas hashable labels for pandas data columns.
+            data_column_pandas_index_names: A list of hashable labels for pandas column index names
+            data_column_snowflake_quoted_identifiers: A list of snowflake quoted identifiers for pandas data columns,
+                represented by str. These identifiers are used to refer columns in underlying snowpark dataframe to
+                access data in snowflake.
+            index_column_pandas_labels: A list of pandas index column labels.
+            index_column_snowflake_quoted_identifiers: A list of snowflake quoted identifiers for pandas index columns.
+        """
+
+        assert len(data_column_snowflake_quoted_identifiers) == len(
+            data_column_pandas_labels
+        ), f"data column label identifier length mismatch, labels {data_column_pandas_labels}, identifiers {data_column_snowflake_quoted_identifiers}"
+        assert len(index_column_snowflake_quoted_identifiers) == len(
+            index_column_pandas_labels
+        ), f"index column label identifier length mismatch, labels {index_column_pandas_labels}, identifiers {index_column_snowflake_quoted_identifiers}"
+
+        # List of pandas_label_to_snowflake_quoted_identifier mapping for index columns
+        index_columns_mapping: list[LabelIdentifierPair] = [
+            LabelIdentifierPair(
+                # index column labels is always flat with only one level
+                from_pandas_label(pandas_label, num_levels=1),
+                snowflake_quoted_identifier,
+            )
+            for pandas_label, snowflake_quoted_identifier in zip(
+                index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers,
+            )
+        ]
+
+        # List of pandas_label_to_snowflake_quoted_identifier mapping for data columns
+        data_columns_mapping: list[LabelIdentifierPair] = [
+            LabelIdentifierPair(
+                from_pandas_label(
+                    pandas_label,
+                    num_levels=len(data_column_pandas_index_names),
+                ),
+                snowflake_quoted_identifier,
+            )
+            for pandas_label, snowflake_quoted_identifier in zip(
+                data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers,
+            )
+        ]
+
+        return cls(
+            ordered_dataframe=ordered_dataframe,
+            label_to_snowflake_quoted_identifier=tuple(
+                index_columns_mapping + data_columns_mapping
+            ),
+            num_index_columns=len(index_column_snowflake_quoted_identifiers),
+            data_column_index_names=tuple(
+                # data_column_index_names is always flat with only one level
+                from_pandas_label(name, num_levels=1)
+                for name in data_column_pandas_index_names
+            ),
+        )
+
+    def __post_init__(self) -> None:
+        # perform checks for dataclass here
+
+        # check there must be at least one index column associated with the dataframe
+        assert (
+            self.num_index_columns >= 1
+        ), "At least 1 index column should be presented for the dataframe"
+
+        # the ordering_columns_tuple cannot be empty, because we guarantee the determinism
+        # for the data order of the dataframe,
+        assert len(self.ordering_columns) > 0, "ordering_columns cannot be empty"
+
+        # validate data columns
+        self._validate_data_column_pandas_index_names()
+
+        # make sure that all names required in metadata are present within snowpark_dataframe
+        # so that the internal frame represents a valid state.
+        snowflake_quoted_identifiers = (
+            self.ordered_dataframe.projected_column_snowflake_quoted_identifiers
+        )
+
+        def validate_snowflake_quoted_identifier(
+            quoted_identifier: str,
+            column_category: str,
+            hashable_label: Hashable = None,
+        ) -> None:
+            """
+            validation for the snowflake quoted identifier, which performs two checks:
+            1) the identifier is quoted 2) the identifier exists in the underlying snowpark dataframe
+
+            Returns:
+                None. Assertion is raised if any check fails.
+            """
+            # generate a properly quoted escaped_name for the error message below.
+            escaped_name = quoted_identifier.replace("'", "\\'")
+            assert is_valid_snowflake_quoted_identifier(
+                quoted_identifier
+            ), f"Found not-quoted identifier for '{column_category}':'{escaped_name}'"
+
+            assert quoted_identifier in snowflake_quoted_identifiers, (
+                f"{column_category}={escaped_name} not found in snowpark dataframe "
+                f"schema {snowflake_quoted_identifiers}, pandas_label={hashable_label}"
+            )
+
+        # validate the snowflake quoted identifier data + index columns
+        for (
+            label,
+            snowflake_quoted_identifier,
+        ) in self.label_to_snowflake_quoted_identifier:
+            validate_snowflake_quoted_identifier(
+                snowflake_quoted_identifier,
+                "dataframe column",
+                to_pandas_label(label),
+            )
+
+        # check that snowflake quoted identifier is duplicate free
+        assert_duplicate_free(
+            self.index_column_snowflake_quoted_identifiers
+            + self.data_column_snowflake_quoted_identifiers,
+            "dataframe columns",
+        )
+
+    def _validate_data_column_pandas_index_names(self) -> None:
+        # the index on column (df.columns) must have a name (can be None)
+        assert (
+            len(self.data_column_pandas_index_names) >= 1
+        ), "data_column_pandas_index_names cannot be empty"
+
+        # validate all labels are tuples with the same length
+        num_levels = len(self.data_column_pandas_index_names)
+        for label, _ in self.label_to_snowflake_quoted_identifier[
+            self.num_index_columns :
+        ]:
+            assert num_levels == len(
+                label
+            ), f"All tuples in data_column_pandas_labels must have the same length {num_levels}, but got {label}"
+
+    @property
+    def index_column_snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        Get snowflake quoted identifier for all index columns
+        Returns:
+            List of snowflake quoted identifiers for index columns
+        """
+        return [
+            col.snowflake_quoted_identifier
+            for col in self.label_to_snowflake_quoted_identifier[
+                : self.num_index_columns
+            ]
+        ]
+
+    @property
+    def data_column_snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        Get snowflake quoted identifier for all data columns
+        Returns:
+            List of snowflake quoted identifiers for data columns
+        """
+        return [
+            col.snowflake_quoted_identifier
+            for col in self.label_to_snowflake_quoted_identifier[
+                self.num_index_columns :
+            ]
+        ]
+
+    def quoted_identifier_to_snowflake_type(self) -> dict[str, DataType]:
+        return {
+            f.column_identifier.quoted_name: f.datatype
+            for f in self.ordered_dataframe.schema.fields
+        }
+
+    @property
+    def index_column_pandas_labels(self) -> list[Hashable]:
+        """
+        Get pandas labels for all index columns
+        Returns:
+            List of pandas labels for index columns
+        """
+        return [
+            to_pandas_label(col.label)
+            for col in self.label_to_snowflake_quoted_identifier[
+                : self.num_index_columns
+            ]
+        ]
+
+    @property
+    def data_column_pandas_labels(self) -> list[Hashable]:
+        """
+        Get pandas labels for all data columns
+        Returns:
+            List of pandas labels for data columns
+        """
+        return [
+            to_pandas_label(col.label)
+            for col in self.label_to_snowflake_quoted_identifier[
+                self.num_index_columns :
+            ]
+        ]
+
+    @property
+    def ordering_column_snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        Get snowflake quoted identifier for ordering columns
+        Return:
+            List of snowflake quoted identifier for the ordering columns
+        """
+
+        return self.ordered_dataframe.ordering_column_snowflake_quoted_identifiers
+
+    @property
+    def ordering_columns(self) -> list[OrderingColumn]:
+        """
+        Get list of ordering columns.
+        Returns:
+            List of OrderingColumn.
+        """
+        return self.ordered_dataframe.ordering_columns
+
+    @property
+    def row_position_snowflake_quoted_identifier(self) -> Optional[str]:
+        return self.ordered_dataframe.row_position_snowflake_quoted_identifier
+
+    @property
+    def row_count_snowflake_quoted_identifier(self) -> Optional[str]:
+        return self.ordered_dataframe.row_count_snowflake_quoted_identifier
+
+    @property
+    def data_column_pandas_index_names(self) -> list[Hashable]:
+        """Returns pandas labels from column index (df.columns.names)."""
+        return [to_pandas_label(name) for name in self.data_column_index_names]
+
+    def num_index_levels(self, *, axis: int = 0) -> int:
+        """
+        Returns number of index levels for given `axis`.
+
+        Args:
+            axis: If axis=0, return number of levels in row labels.
+                If axis=1, return number of levels in columns labels.
+
+        Returns:
+            number of index levels for given `axis`
+
+        Raises:
+            ValueError if `axis` is not valid.
+        """
+        if axis == 0:
+            return self.num_index_columns
+        elif axis == 1:
+            return len(self.data_column_pandas_index_names)
+        else:
+            raise ValueError("'axis' can only be 0 or 1")
+
+    def is_multiindex(self, *, axis: int = 0) -> bool:
+        """
+        Returns whether the InternalFrame has a MultiIndex along `axis`.
+        Args:
+            axis: If axis=0, return whether the InternalFrame has a MultiIndex as df.index.
+                If axis=1, return whether the InternalFrame has a MultiIndex as df.columns.
+        """
+        return self.num_index_levels(axis=axis) > 1
+
+    def is_unnamed_series(self) -> bool:
+        """
+        Check if the InternalFrame is a representation for an unnamed series. An InternalFrame represents an
+        unnamed series if there is only one data column and the data column has label name MODIN_UNNAMED_SERIES_LABEL.
+        """
+        return (
+            len(self.data_column_pandas_labels) == 1
+            and self.data_column_pandas_labels[0] == MODIN_UNNAMED_SERIES_LABEL
+        )
+
+    @property
+    def data_columns_index(self) -> pd.Index:
+        """
+        Returns pandas Index object for column index (df.columns).
+        We can't do the same thing for df.index here because it requires pulling
+        the data from snowflake and filing a query to snowflake.
+        """
+        if self.is_multiindex(axis=1):
+            return pd.MultiIndex.from_tuples(
+                self.data_column_pandas_labels,
+                names=self.data_column_pandas_index_names,
+            )
+        else:
+            return pd.Index(
+                self.data_column_pandas_labels,
+                name=self.data_column_pandas_index_names[0],
+                # setting tupleize_cols=False to avoid creating a MultiIndex
+                # otherwise, when labels are tuples (e.g., [("A", "a"), ("B", "b")]),
+                # a MultiIndex will be created incorrectly
+                tupleize_cols=False,
+            )
+
+    @property
+    def index_columns_index(self) -> pd.Index:
+        """
+        Get pandas index. The method eagerly pulls the values from Snowflake because index requires the values to be
+        filled
+
+        Returns:
+            The index (row labels) of the DataFrame.
+        """
+
+        index_values = snowpark_to_pandas_helper(
+            self.ordered_dataframe.select(
+                self.index_column_snowflake_quoted_identifiers
+            )
+        ).values
+        if self.is_multiindex(axis=0):
+            value_tuples = [tuple(row) for row in index_values]
+            return pd.MultiIndex.from_tuples(
+                value_tuples, names=self.index_column_pandas_labels
+            )
+        else:
+            # We have one index column. Fill in the type correctly.
+            index_identifier = self.index_column_snowflake_quoted_identifiers[0]
+            index_type = TypeMapper.to_pandas(
+                self.quoted_identifier_to_snowflake_type()[index_identifier]
+            )
+            ret = pd.Index(
+                [row[0] for row in index_values],
+                name=self.index_column_pandas_labels[0],
+                # setting tupleize_cols=False to avoid creating a MultiIndex
+                # otherwise, when labels are tuples (e.g., [("A", "a"), ("B", "b")]),
+                # a MultiIndex will be created incorrectly
+                tupleize_cols=False,
+            )
+            # When pd.Index() failed to reduce dtype to a numpy or pandas extension type, it will be object type. For
+            # example, an empty dataframe will be object dtype by default, or a variant, or a timestamp column with
+            # multiple timezones. So here we cast the index to the index_type when ret = pd.Index(...) above cannot
+            # figure out a non-object dtype. Note that the index_type is a logical type may not be 100% accurate.
+            if is_object_dtype(ret.dtype) and not is_object_dtype(index_type):
+                ret = ret.astype(index_type)
+            return ret
+
+    def get_snowflake_quoted_identifiers_group_by_pandas_labels(
+        self,
+        pandas_labels: list[Hashable],
+        include_index: bool = True,
+    ) -> list[tuple[str, ...]]:
+        """
+        Map given pandas labels to names in underlying snowpark dataframe. Given labels can be data or index labels.
+        Single label can map to multiple snowpark names from underlying dataframe. Which is represented by tuples.
+        We return the result in the same order as input pandas_labels.
+
+        Args:
+            pandas_labels: A list of pandas labels.
+            include_index: Include the index columns in addition to data columns, default is True.
+
+        Returns:
+            A list of tuples for matched identifiers. Each element of list is a tuple of str containing matched
+            snowflake quoted identifiers for corresponding pandas label in 'pandas_labels'.
+            Length and order of this list is same as length of given 'pandas_labels'.
+        """
+
+        snowflake_quoted_identifiers = []
+        for label in pandas_labels:
+            matched_columns = list(
+                filter(
+                    lambda col: to_pandas_label(col.label) == label,
+                    self.label_to_snowflake_quoted_identifier[
+                        (0 if include_index else self.num_index_columns) :
+                    ],
+                )
+            )
+            snowflake_quoted_identifiers.append(
+                tuple(col.snowflake_quoted_identifier for col in matched_columns)
+            )
+
+        return snowflake_quoted_identifiers
+
+    def parse_levels_to_integer_levels(
+        self, levels: IndexLabel, allow_duplicates: bool, axis: int = 0
+    ) -> list[int]:
+        """
+        Returns a list of integers representing levels in Index object on given axis.
+
+        Args:
+            levels: IndexLabel, can be int, level name, or sequence of such.
+            allow_duplicates: whether allow duplicated levels in the result. When False, the result will not
+                contain any duplicated levels. Otherwise, the result will contain duplicated level number if
+                different level value is mapped to the same level number.
+            axis: DataFrame axis, given levels belong to. Defaults to 0. Allowed values
+                are 0 or 1.
+        Returns:
+            List[int]
+                A list of integers corresponding to the index levels for the given level, and in the same
+                order as given level
+        """
+        num_level = self.num_index_levels(axis=axis)
+        if levels is not None:
+            if not isinstance(levels, (tuple, list)):
+                levels = [levels]
+            result = []
+            for key in levels:
+                if isinstance(key, int):
+                    error_message = f"Too many levels: Index has only {num_level} level{'s' if num_level > 1 else ''}"
+                    # when key < 0, raise IndexError if key < -num_level as native pandas does
+                    # set key to a positive number as native pandas does
+                    if key < 0:
+                        key = key + num_level
+                        if key < 0:
+                            raise IndexError(
+                                f"{error_message}, {key - num_level} is not a valid level number"
+                            )
+                    # when key > num_level - 1, raise IndexError as native pandas does
+                    elif key > num_level - 1:  # level starts from 0
+                        raise IndexError(f"{error_message}, not {key + 1}")
+                elif isinstance(key, str):  # get level number from label
+                    try:
+                        if axis == 0:
+                            key = self.index_column_pandas_labels.index(key)
+                        else:
+                            key = self.data_column_pandas_index_names.index(key)
+                    # if key doesn't exist, a ValueError will be raised
+                    except ValueError:
+                        if num_level > 1:
+                            raise KeyError(f"Level {key} not found")
+                        else:
+                            raise KeyError(
+                                f"Requested level ({key}) does not match index name ({self.index_column_pandas_labels[0]})"
+                            )
+                # do not add key in the result if the key is already in the result and duplication is not allowed
+                if (key not in result) or allow_duplicates:
+                    result.append(key)
+        else:
+            result = list(range(num_level))
+        return result
+
+    def get_pandas_labels_for_levels(self, levels: list[int]) -> list[Hashable]:
+        """
+        Get the list of corresponding pandas labels for a list of given integer
+        Index levels.
+        Note: duplication in levels is allowed.
+        """
+        return [self.index_column_pandas_labels[level] for level in levels]
+
+    def get_snowflake_identifiers_for_levels(self, levels: list[int]) -> list[str]:
+        """
+        Get the list of corresponding Snowflake identifiers for a list of given integer index levels.
+
+        Note: duplication in levels is allowed.
+        """
+        return [
+            self.index_column_snowflake_quoted_identifiers[level] for level in levels
+        ]
+
+    def get_snowflake_identifiers_and_pandas_labels_from_levels(
+        self, levels: list[int]
+    ) -> tuple[list[Hashable], list[str], list[Hashable], list[str]]:
+        """
+        Selects snowflake identifiers and pandas labels from index columns in `levels`.
+        Also returns snowflake identifiers and pandas labels not in `levels`.
+
+        Args:
+            levels: A list of integers represents levels in pandas Index.
+
+        Returns:
+            A tuple contains 4 lists:
+            1. The first list contains snowflake identifiers of index columns in `levels`.
+            2. The second list contains pandas labels of index columns in `levels`.
+            3. The third list contains snowflake identifiers of index columns not in `levels`.
+            4. The fourth list contains pandas labels of index columns not in `levels`.
+        """
+        index_column_pandas_labels_in_levels = []
+        index_column_snowflake_quoted_identifiers_in_levels = []
+        index_column_pandas_labels_not_in_levels = []
+        index_column_snowflake_quoted_identifiers_not_in_levels = []
+        for idx, (identifier, label) in enumerate(
+            zip(
+                self.index_column_snowflake_quoted_identifiers,
+                self.index_column_pandas_labels,
+            )
+        ):
+            if idx in levels:
+                index_column_pandas_labels_in_levels.append(label)
+                index_column_snowflake_quoted_identifiers_in_levels.append(identifier)
+            else:
+                index_column_pandas_labels_not_in_levels.append(label)
+                index_column_snowflake_quoted_identifiers_not_in_levels.append(
+                    identifier
+                )
+
+        return (
+            index_column_pandas_labels_in_levels,
+            index_column_snowflake_quoted_identifiers_in_levels,
+            index_column_pandas_labels_not_in_levels,
+            index_column_snowflake_quoted_identifiers_not_in_levels,
+        )
+
+    @functools.cached_property
+    def num_rows(self) -> int:
+        """
+        Returns:
+            Number of rows in this frame.
+        """
+        return count_rows(self.ordered_dataframe)
+
+    def has_unique_index(self, axis: Optional[int] = 0) -> bool:
+        """
+        Returns true if index has unique values on specified axis.
+        Args:
+            axis: {0, 1} defaults to 0
+
+        Returns:
+            True if index has unique values on specified axis, otherwise returns False.
+
+        """
+        if axis == 1:
+            return self.data_columns_index.is_unique
+        else:
+            # Note: We can't use 'count_distinct' because it ignores null values.
+            total_rows = self.num_rows
+            distinct_rows = count_rows(
+                get_distinct_rows(
+                    self.ordered_dataframe.select(
+                        self.index_column_snowflake_quoted_identifiers
+                    )
+                )
+            )
+            return total_rows == distinct_rows
+
+    def validate_no_duplicated_data_columns_mapped_for_labels(
+        self,
+        pandas_labels: list[Hashable],
+        user_frame_identifier: Optional[str] = None,
+    ) -> None:
+        """
+        For a given set of pandas labels, verify that there are no multiple data columns in the frame
+        mapped to the same label in the `pandas_labels`.
+
+        Args:
+            pandas_labels: set of pandas labels to check for duplicated column mappings
+            user_frame_identifier: the identifier for the frame that is used in the error message to help user to
+                    identify which input frame has error. For example, it can be 'condition' or 'other' frame for
+                    where API.
+        Raises:
+            ValueError: if for a pandas label, there exists more than one data columns in the given frame mapped to the label.
+        """
+        label_identifiers_list = (
+            self.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                pandas_labels=pandas_labels, include_index=False
+            )
+        )
+        labels_with_duplication = [
+            pandas_labels[i]
+            for (i, label_identifiers_tuple) in enumerate(label_identifiers_list)
+            if len(label_identifiers_tuple) > 1
+        ]
+        if len(labels_with_duplication) > 0:
+            # The error message raised under duplication cases is different from native pandas.
+            # Native pandas raises ValueError with message "cannot reindex on an axis with duplicate labels"
+            # for duplication occurs in the condition frame, and raises InvalidIndexError with no message for
+            # duplication occurs in other frame.
+            # Snowpark pandas gives a clear message to the customer about what is the problem with the dataframe.
+            message = f"Multiple columns are mapped to each label in {labels_with_duplication} in DataFrame"
+            if user_frame_identifier is not None:
+                message += f" {user_frame_identifier}"
+            raise ValueError(message)
+
+    ###########################################################################
+    # START: Internal Frame mutation APIs.
+    # APIs that creates a new InternalFrame instance, should only be added below
+    def ensure_row_position_column(self) -> "InternalFrame":
+        """
+        Ensure row position column is computed for given internal frame.
+
+        Returns:
+            A new InternalFrame instance with computed virtual index.
+        """
+        return InternalFrame.create(
+            ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(),
+            data_column_pandas_labels=self.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+        )
+
+    def ensure_row_count_column(self) -> "InternalFrame":
+        """
+        Ensure row position column is computed for given internal frame.
+
+        Returns:
+            A new InternalFrame instance with computed virtual index.
+        """
+        return InternalFrame.create(
+            ordered_dataframe=self.ordered_dataframe.ensure_row_count_column(),
+            data_column_pandas_labels=self.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+        )
+
+    def append_column(
+        self, pandas_label: Hashable, value: SnowparkColumn
+    ) -> "InternalFrame":
+        """
+        Append a column to this frame. The column is added at the end. For a frame with multiindex column, it
+        automatically fills the missing levels with None. For example, in a table with MultiIndex columns like
+        ("A", "col1"), ("A", "col2"), ("B", "col1"), ("B", "col2"), appending a count column "cnt" will produce
+        a column labelled ("cnt", None).
+
+        Args:
+            pandas_label: pandas label for column to be inserted.
+            value: SnowparkColumn.
+
+        Returns:
+            A new InternalFrame with new column.
+        """
+        # +---------------+---------------+---------------+---------------+       +---------------+
+        # | ("A", "col1") | ("A", "col2") | ("B", "col1") | ("B", "col2") |       | "cnt"         |
+        # +---------------+---------------+---------------+---------------+   +   +---------------+
+        # | . . .         | . . .         | . . .         | . . .         |       | . . .         |
+        # +---------------+---------------+---------------+---------------+       +---------------+
+        #
+        # Appending a column "cnt" to the table below will produce the following table:
+        # +---------------+---------------+---------------+---------------+---------------+
+        # | ("A", "col1") | ("A", "col2") | ("B", "col1") | ("B", "col2") | ("cnt", None) |
+        # +---------------+---------------+---------------+---------------+---------------+
+        # | . . .         | . . .         | . . .         | . . .         | . . .         |
+        # +---------------+---------------+---------------+---------------+---------------+
+
+        # Generate label for the column to be appended.
+        nlevels = self.num_index_levels(axis=1)
+        pandas_label = fill_missing_levels_for_pandas_label(
+            pandas_label, nlevels, 0, None
+        )
+
+        # Generate snowflake quoted identifier for new column to be added.
+        new_column_identifier = (
+            self.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[pandas_label],
+            )[0]
+        )
+        new_ordered_dataframe = append_columns(
+            self.ordered_dataframe, new_column_identifier, value
+        )
+        return InternalFrame.create(
+            ordered_dataframe=new_ordered_dataframe,
+            data_column_pandas_labels=self.data_column_pandas_labels + [pandas_label],
+            data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers
+            + [new_column_identifier],
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+        )
+
+    def project_columns(
+        self,
+        pandas_labels: list[Hashable],
+        column_objects: list[SnowparkColumn],
+    ) -> "InternalFrame":
+        """
+        Project new columns with column_objects as the new data columns for the new Internal Frame.
+        The original index columns, ordering columns and row position columns are still used as the
+        index columns, ordering columns and row position columns for the new Internal Frame.
+
+        * Note that this is different with append column in the sense that the data columns of the
+        original data frame will not be part of the data columns of the result dataframe. The data
+        column of the result dataframe only contains the new projected data columns.
+
+        Args:
+            pandas_labels: The pandas labels for the newly projected data columns
+            column_objects: the Snowpark columns used to project the new data columns
+
+        Returns:
+            A new InternalFrame with the newly projected columns as data column
+        """
+        new_column_identifiers = (
+            self.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=pandas_labels,
+            )
+        )
+        new_ordered_dataframe = append_columns(
+            self.ordered_dataframe, new_column_identifiers, column_objects
+        )
+        return InternalFrame.create(
+            ordered_dataframe=new_ordered_dataframe,
+            data_column_pandas_labels=pandas_labels,
+            data_column_snowflake_quoted_identifiers=new_column_identifiers,
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+        )
+
+    def rename_snowflake_identifiers(
+        self, old_to_new_identifiers: dict[str, str]
+    ) -> "InternalFrame":
+        """
+        Rename columns for underlying ordered dataframe.
+
+        Args:
+            old_to_new_identifiers: A dictionary from old to new identifiers name.
+              Identifiers which do not occur in dictionary are not renamed.
+
+        Returns:
+            A new InternalFrame instance after rename.
+
+        Raises:
+            KeyError if columns are not index or data column of the current internal frame.
+        """
+        if not old_to_new_identifiers:
+            return self
+
+        ordered_dataframe = self.ordered_dataframe
+        internal_frame_column_quoted_identifiers = (
+            self.index_column_snowflake_quoted_identifiers
+            + self.data_column_snowflake_quoted_identifiers
+        )
+        for old_id in old_to_new_identifiers:
+            if old_id not in internal_frame_column_quoted_identifiers:
+                raise KeyError(
+                    f"Column not found: '{old_id}'."
+                    f" Internal frame has following data and index columns: {internal_frame_column_quoted_identifiers}"
+                )
+        select_list = []
+        any_column_to_rename = False
+        ordering_and_row_position_columns = (
+            ordered_dataframe.ordering_column_snowflake_quoted_identifiers
+            + [ordered_dataframe.row_position_snowflake_quoted_identifier]
+            if ordered_dataframe.row_position_snowflake_quoted_identifier is not None
+            else []
+        )
+        for old_id in ordered_dataframe.projected_column_snowflake_quoted_identifiers:
+            # Alias to new identifier name if present in 'old_to_new_identifiers',
+            # otherwise leave unchanged.
+            new_id = old_to_new_identifiers.get(old_id, old_id)
+            if old_id == new_id:
+                # retain the original column
+                select_list.append(old_id)
+            else:
+                select_list.append(col(old_id).as_(new_id))
+                # if the old column is part of the ordering or row position columns, retains the column
+                # as part of the projected columns.
+                if old_id in ordering_and_row_position_columns:
+                    select_list.append(old_id)
+
+            any_column_to_rename = any_column_to_rename or new_id != old_id
+        if not any_column_to_rename:
+            # This is possible when values in 'old_to_new_identifiers' are same as keys.
+            return self
+        ordered_dataframe = ordered_dataframe.select(select_list)
+
+        def get_updated_identifiers(identifiers: list[str]) -> list[str]:
+            """
+            Get the new identifier after rename, and if not exist in the rename map,
+            no rename happens, the original name is returned
+
+            Args:
+                identifiers: List of identifiers to get updated identifiers.
+
+            Returns:
+                A list of identifiers after rename, if not exist in the rename map,
+                original name is returned.
+            """
+            return [old_to_new_identifiers.get(i, i) for i in identifiers]
+
+        return InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=self.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=get_updated_identifiers(
+                self.data_column_snowflake_quoted_identifiers
+            ),
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=get_updated_identifiers(
+                self.index_column_snowflake_quoted_identifiers
+            ),
+        )
+
+    def update_snowflake_quoted_identifiers_with_expressions(
+        self,
+        quoted_identifier_to_column_map: dict[str, SnowparkColumn],
+    ) -> UpdatedInternalFrameResult:
+        """
+        Points Snowflake quoted identifiers to column expression given by `quoted_identifier_to_column_map`.
+
+        This function takes a mapping from existing snowflake quoted identifiers to
+        new Snowpark column expressions and points the existing quoted identifiers to the
+        column expressions provided by the mapping. For optimization purposes,
+        existing expressions are kept as columns. This does not change pandas labels.
+
+        The process involves the following steps:
+
+        1. Create a list of new snowflake quoted column identifiers from existing snowflake quoted
+           column identifiers (keys of `quoted_identifier_to_column_map`) to prevent naming conflicts.
+        2. Append new Snowpark columns (values of `quoted_identifier_to_column_map`)
+           to the end of the Snowpark DataFrame with new snowflake quoted column identifiers
+           generated at step 1.
+        3. Update index and data column identifiers in the internal frame, by replacing existing
+           snowflake quoted identifiers (keys of `quoted_identifier_to_column_map`)
+           with new snowflake quoted column identifiers created in step 1
+
+        Args:
+            quoted_identifier_to_column_map (Dict[str, SnowparkColumn]): A dictionary mapping
+                existing snowflake quoted identifiers to new Snowpark columns.
+                As keys of a dictionary, all snowflake column identifiers are unique here and
+                must be index columns and data columns in the original internal frame.
+
+        Returns:
+            UpdatedInternalFrameResult: A tuple contaning the new InternalFrame with updated column references, and a mapping
+                                        of the old column ids to the new column ids.
+
+        Raises:
+            ValueError if any key of quoted_identifier_to_column_map is not in the data or index columns of the internal frame.
+
+        Example:
+            `update_snowflake_quoted_identifiers_with_expressions(quoted_identifier_to_column_map={'"A"' : lit(10), '"B"': col('"A"') + col('"B"')}).frame`
+            The internal frame has pandas labels ['pd_a', 'pd_b', 'pd_a'] (there can be duplicates),
+            mapping to the snowflake quoted identifiers ['"A"', '"B"', '"C"'], i.e. 'pd_a' -> "A", 'pd_b' -> "B", 'pd_a' -> "C".
+            Index column identifiers are ['"A"'] and data column identifiers are ['"B"', '"C"'].
+            Calling this function will now create new identifiers (and keep the old ones), so that
+            'pd_a' -> lit(10), 'pd_b' -> col('"A"') + col('"B"'), 'pd_a' -> "C".
+            For this, the function generates new aliases, e.g. '"A2"' for lit(10), and '"B2"' for col('"A"') + col('"B"').
+            Thus, after applying this function the snowpark dataframe backing this internal frame up has
+            ['"A"', '"B"', '"C"', '"A2"', '"B2"'] as quoted identifiers.
+            Index column identifiers become ['"A2"'] and data column identifiers are still ['"B2"', '"C"'].
+        """
+        # no-op
+        if not quoted_identifier_to_column_map:
+            return UpdatedInternalFrameResult(self, {})
+
+        all_data_index_identifiers = set(
+            self.data_column_snowflake_quoted_identifiers
+            + self.index_column_snowflake_quoted_identifiers
+        )
+        for identifier in quoted_identifier_to_column_map:
+            if identifier not in all_data_index_identifiers:
+                raise ValueError(f"{identifier} is not in {all_data_index_identifiers}")
+
+        existing_id_to_new_id_mapping = {}
+        new_columns = []
+        for (
+            existing_identifier,
+            column_expression,
+        ) in quoted_identifier_to_column_map.items():
+            new_identifier = (
+                self.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[
+                        extract_pandas_label_from_snowflake_quoted_identifier(
+                            existing_identifier
+                        )
+                    ],
+                )[0]
+            )
+            existing_id_to_new_id_mapping[existing_identifier] = new_identifier
+            new_columns.append(column_expression)
+        new_ordered_dataframe = append_columns(
+            self.ordered_dataframe,
+            list(existing_id_to_new_id_mapping.values()),
+            new_columns,
+        )
+        # update index_column_snowflake_quoted_identifiers and data_column_snowflake_quoted_identifiers
+        # the order of index/data_column_snowflake_quoted_identifiers is not changed so we can still
+        # keep the correct mapping between quoted identifiers and pandas labels
+        new_index_column_snowflake_quoted_identifiers = [
+            existing_id_to_new_id_mapping.get(identifier, identifier)
+            for identifier in self.index_column_snowflake_quoted_identifiers
+        ]
+        new_data_column_snowflake_quoted_identifiers = [
+            existing_id_to_new_id_mapping.get(identifier, identifier)
+            for identifier in self.data_column_snowflake_quoted_identifiers
+        ]
+
+        return UpdatedInternalFrameResult(
+            InternalFrame.create(
+                ordered_dataframe=new_ordered_dataframe,
+                data_column_pandas_labels=self.data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+                data_column_pandas_index_names=self.data_column_pandas_index_names,
+                index_column_pandas_labels=self.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=new_index_column_snowflake_quoted_identifiers,
+            ),
+            existing_id_to_new_id_mapping,
+        )
+
+    def apply_snowpark_function_to_data_columns(
+        self, snowpark_func: Callable[[Any], SnowparkColumn]
+    ) -> "InternalFrame":
+        """
+        Apply snowpark function callable to data columns of an InternalFrame.  The snowflake quoted identifiers
+        are preserved.
+
+        Arguments:
+            snowpark_func: Snowpark function to apply to data columns of underlying snowpark df.
+
+        Returns:
+            InternalFrame with snowpark_func applies to data columns of original frame, all other columns remain unchanged.
+        """
+        new_internal_frame = self.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                snowflake_quoted_identifier: snowpark_func(snowflake_quoted_identifier)
+                for snowflake_quoted_identifier in self.data_column_snowflake_quoted_identifiers
+            }
+        ).frame
+        return new_internal_frame
+
+    def select_active_columns(self) -> "InternalFrame":
+        """
+        Select active columns of the current internal frame, the active columns include index + data columns,
+        ordering columns and row position column if exists. This function is used to re-project all active columns
+        in the ordered dataframe, and drop off unnecessary columns from the projected columns of the ordered dataframe.
+
+        Returns:
+            A new InternalFrame with the associated ordered dataframe contains the following projected columns:
+                1) index + data columns
+                2) ordering columns
+                3) row position column if exists
+
+        """
+        active_column_quoted_identifiers = (
+            self.index_column_snowflake_quoted_identifiers
+            + self.data_column_snowflake_quoted_identifiers
+        )
+        # add the missing ordering columns
+        active_column_quoted_identifiers += [
+            quoted_identifier
+            for quoted_identifier in self.ordering_column_snowflake_quoted_identifiers
+            if quoted_identifier not in active_column_quoted_identifiers
+        ]
+
+        if (
+            self.row_position_snowflake_quoted_identifier is not None
+            and self.row_position_snowflake_quoted_identifier
+            not in active_column_quoted_identifiers
+        ):
+            active_column_quoted_identifiers.append(
+                self.row_position_snowflake_quoted_identifier
+            )
+
+        return InternalFrame.create(
+            ordered_dataframe=self.ordered_dataframe.select(
+                active_column_quoted_identifiers
+            ),
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+            data_column_pandas_labels=self.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+        )
+
+    def strip_duplicates(
+        self: "InternalFrame", quoted_identifiers: list[str]
+    ) -> "InternalFrame":
+        """
+        When assigning frames via index operations for duplicates only the last entry is used, as entries are repeatedly overwritten.
+        For example writing a series to a key [0, 1, 0] with values [1,2,3] will put value 2 to position 1, and value 3 to position 0.
+        This function strips the preceding index/value rows to emulate repeated writes.
+
+        Args:
+            quoted_identifiers: the column identifiers to use for creating individual groups from which to take the last element.
+
+        Returns:
+            new internal frame with unique index.
+        """
+
+        frame = self.ensure_row_position_column()
+
+        # To remove the duplicates, first compute via windowing over index columns the value of the last row position.
+        # with this join then select only the relevant rows. Note that an EXISTS subquery doesn't work here because
+        # Snowflake fails with a non-supported subquery expression error for LAST_VALUE.
+        # SELECT a.* EXCLUDE (pos) FROM df a JOIN (SELECT DISTINCT LAST_VALUE(pos) OVER
+        # (PARTITION BY (idx, other_idx) ORDER BY pos) AS pos FROM df) b ON a.pos = b.pos;
+
+        assert len(quoted_identifiers) == len(
+            set(quoted_identifiers)
+            & set(frame.ordered_dataframe.projected_column_snowflake_quoted_identifiers)
+        ), "could not find all quoted identifiers in frame"
+
+        relevant_last_value_row_positions_quoted_identifier = (
+            frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[ROW_POSITION_COLUMN_LABEL],
+            )[0]
+        )
+
+        relevant_last_value_row_positions = get_distinct_rows(
+            frame.ordered_dataframe.select(
+                last_value(col(frame.row_position_snowflake_quoted_identifier))
+                .over(
+                    Window.partition_by(quoted_identifiers).order_by(
+                        frame.row_position_snowflake_quoted_identifier
+                    )
+                )
+                .as_(relevant_last_value_row_positions_quoted_identifier)
+            )
+        )
+
+        joined_ordered_dataframe = frame.ordered_dataframe.join(
+            right=relevant_last_value_row_positions,
+            left_on_cols=[frame.row_position_snowflake_quoted_identifier],
+            right_on_cols=[relevant_last_value_row_positions_quoted_identifier],
+            how="inner",
+        )
+
+        # Because we reuse row position to select the relevant columns, we need to
+        # generate a new row position column here so locational indexing after this operation
+        # continues to work correctly.
+        new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column()
+        return InternalFrame.create(
+            ordered_dataframe=new_ordered_dataframe,
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+        )
+
+    def filter(
+        self: "InternalFrame", expr: Union[SnowparkColumn, str]
+    ) -> "InternalFrame":
+        """
+        A helper method to apply filter on the internal frame
+        Args:
+            expr: the expression of the filter
+
+        Returns:
+            The internal frame after filtering
+        """
+        return InternalFrame.create(
+            ordered_dataframe=self.ordered_dataframe.filter(expr),
+            data_column_pandas_labels=self.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self.data_column_pandas_index_names,
+            index_column_pandas_labels=self.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
+        )
+
+    def normalize_snowflake_quoted_identifiers_with_pandas_label(
+        self,
+    ) -> "InternalFrame":
+        """
+        Normalize snowflake quoted identifiers for index and data columns based on the pandas label to make sure
+        the quoted identifier is in format of <label> or <label>_<postfix>.
+
+        Returns:
+            A new internalFrame with the snowflake quoted identifiers for index and data columns all in
+            the normalized format.
+        """
+
+        def is_quoted_identifier_normalized(
+            pandas_label: Hashable, quoted_identifier: str
+        ) -> bool:
+            # a quoted identifier is viewed as normalized if its prefix is the quoted pandas label string
+            quoted_label = quote_name_without_upper_casing(f"{pandas_label}")
+            return quoted_identifier.startswith(quoted_label)
+
+        # record all columns where snowflake quoted identifiers are not in normalized form.
+        columns_to_rename: list[PandasLabelToSnowflakeIdentifierPair] = []
+        for pandas_label, snowflake_quoted_identifier in zip(
+            self.index_column_pandas_labels + self.data_column_pandas_labels,
+            self.index_column_snowflake_quoted_identifiers
+            + self.data_column_snowflake_quoted_identifiers,
+        ):
+            if pandas_label is None:
+                # Replace empty/None labels with INDEX_LABEL or DEFAULT_DATA_COLUMN_LABEL before
+                # generating snowflake identifiers.
+                if (
+                    snowflake_quoted_identifier
+                    in self.index_column_snowflake_quoted_identifiers
+                ):
+                    pandas_label = INDEX_LABEL
+                else:
+                    pandas_label = DEFAULT_DATA_COLUMN_LABEL
+            if not is_quoted_identifier_normalized(
+                pandas_label, snowflake_quoted_identifier
+            ):
+                columns_to_rename.append(
+                    PandasLabelToSnowflakeIdentifierPair(
+                        pandas_label, snowflake_quoted_identifier
+                    )
+                )
+
+        if len(columns_to_rename) == 0:
+            # no columns to rename, return
+            return self
+
+        rename_column_labels, original_quoted_identifiers = tuple(
+            zip(*columns_to_rename)
+        )
+        # generate normalized snowflake quoted identifiers based on pandas labels and
+        # call rename_snowflake_identifiers to rename the columns.
+        new_snowflake_quoted_identifiers = (
+            self.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=list(rename_column_labels),
+            )
+        )
+        renamed_quoted_identifier_mapping = dict(
+            zip(
+                list(original_quoted_identifiers),
+                new_snowflake_quoted_identifiers,
+            )
+        )
+        return self.rename_snowflake_identifiers(renamed_quoted_identifier_mapping)
+
+    # END: Internal Frame mutation APIs.
+    ###########################################################################
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py
new file mode 100644
index 00000000000..bea4387864d
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py
@@ -0,0 +1,229 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+from typing import Optional
+
+from pandas import NaT, Timestamp
+from pandas._libs.tslibs.offsets import BaseOffset, to_offset
+from pandas.core.arrays._ranges import _generate_range_overflow_safe
+
+from snowflake.snowpark import DataFrame
+from snowflake.snowpark.context import get_active_session
+from snowflake.snowpark.functions import builtin, col, to_time
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.compiler import snowflake_query_compiler
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+
+
+def generate_regular_range(
+    start: Optional[Timestamp],
+    end: Optional[Timestamp],
+    periods: Optional[int],
+    freq: BaseOffset,
+) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
+    """
+    Generate a range of timestamps with the spans between dates
+    described by the given `freq` DateOffset.
+
+    Parameters
+    ----------
+    start : Timedelta, Timestamp or None
+        First point of produced date range.
+    end : Timedelta, Timestamp or None
+        Last point of produced date range.
+    periods : int or None
+        Number of periods in produced date range.
+    freq : Tick
+        Describes space between dates in produced date range.
+
+    Returns
+    -------
+    A SnowflakeQueryCompiler with a single int column representing nanoseconds.
+    """
+    istart = start.value if start is not None else None
+    iend = end.value if end is not None else None
+    stride = freq.nanos
+
+    # generate start, end, and stride (the logic below is copied from generate_regular_range() method at
+    # pandas/core/arrays/_ranges.py#L24
+    if periods is None and istart is not None and iend is not None:
+        b = istart
+        # cannot just use e = Timestamp(end) + 1 because arange breaks when
+        # stride is too large, see GH10887
+        e = b + (iend - b) // stride * stride + stride // 2 + 1
+    elif istart is not None and periods is not None:
+        b = istart
+        e = _generate_range_overflow_safe(b, periods, stride, side="start")
+    elif iend is not None and periods is not None:
+        e = iend + stride
+        b = _generate_range_overflow_safe(e, periods, stride, side="end")
+    else:
+        raise ValueError(  # pragma: no cover
+            "at least 'start' or 'end' should be specified if a 'period' is given."
+        )
+    return generate_range(b, e, stride)
+
+
+def _create_qc_from_snowpark_dataframe(
+    sp_df: DataFrame,
+) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
+    """
+    Create a Snowflake query compiler from a Snowpark DataFrame, assuming the DataFrame only contains one column.
+
+    Args:
+        sp_df: the Snowpark DataFrame
+
+    Returns:
+        A Snowflake query compiler
+    """
+    odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column()
+
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    return SnowflakeQueryCompiler(
+        InternalFrame.create(
+            ordered_dataframe=odf,
+            data_column_pandas_labels=[None],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=odf.projected_column_snowflake_quoted_identifiers[
+                :-1
+            ],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=[
+                odf.row_position_snowflake_quoted_identifier
+            ],
+        )
+    )
+
+
+def generate_range(
+    start: int,
+    end: Optional[int],
+    step: int,
+) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
+    """
+    Use `session.range` to generate values in range and represent in a query compiler
+
+    Args:
+        start: start number
+        end: end number
+        step: step number
+
+    Returns:
+        The query compiler containing int values
+    """
+    return _create_qc_from_snowpark_dataframe(
+        get_active_session().range(start, end, step)
+    )
+
+
+# The mapping from irregular pandas DateOffset to Snowflake date_or_item_part
+# See https://pandas.pydata.org/pandas-docs/version/1.5/user_guide/timeseries.html#timeseries-offset-aliases
+# See https://docs.snowflake.com/en/sql-reference/functions-date-time#label-supported-date-time-parts
+OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP = {
+    "ME": "month",
+    "MS": "month",
+    "W-SUN": "week",
+    "QS-JAN": "quarter",
+    "QE-DEC": "quarter",
+    "YS-JAN": "year",
+    "YE-DEC": "year",
+}
+# The offset names requires last day of a frequency, e.g., "M" means the last day of a month.
+LAST_DAY = {"ME", "QE-DEC", "YE-DEC"}
+
+
+def _offset_name_to_sf_date_or_time_part(name: str) -> Optional[str]:
+    """
+    Map pandas offset name to Snowflake date_or_time_part.
+
+    Args:
+        name: pandas offset name
+
+    Returns:
+        Snowflake date_or_time_part
+
+    Raises:
+        NotImplementedError if the offset name is not supported.
+    """
+    if name in OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:
+        return OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]
+    ErrorMessage.not_implemented(
+        f"offset {name} is not implemented in Snowpark pandas API"
+    )
+    return None
+
+
+def generate_irregular_range(
+    start: Optional[Timestamp],
+    end: Optional[Timestamp],
+    periods: Optional[int],
+    offset: BaseOffset,
+) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
+    """
+    Generates a sequence of dates corresponding to the specified time
+    offset.
+
+    Args:
+        start : datetime
+        end : datetime
+        periods : int
+        offset : DateOffset
+
+    Returns:
+        The query compiler containing the generated datetime values
+    """
+    offset = to_offset(offset)
+
+    start = Timestamp(start)
+    start = start if start is not NaT else None
+    end = Timestamp(end)
+    end = end if end is not NaT else None
+
+    if start:
+        start = offset.rollforward(start)
+
+    if end:
+        end = offset.rollback(end)
+
+    if periods is None and end < start and offset.n >= 0:
+        end = None
+        periods = 0
+
+    if end is None:
+        end = start + (periods - 1) * offset  # type: ignore[operator]
+
+    if start is None:
+        start = end - (periods - 1) * offset  # type: ignore[operator]
+
+    if periods is None:
+        periods = 0
+        while start + periods * offset <= end:
+            periods += 1
+
+    num_offsets = get_active_session().range(start=0, end=periods, step=1)
+    sf_date_or_time_part = _offset_name_to_sf_date_or_time_part(offset.name)
+    dt_col = builtin("DATEADD")(
+        sf_date_or_time_part,
+        offset.n * col(num_offsets.columns[0]),
+        pandas_lit(start),
+    )
+    if offset.name in LAST_DAY:
+        # When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the
+        # last day, e.g., adding one month to "2/29/2024" using DATEADD results "3/29/2024", which is not the last day
+        # of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the
+        # timestamp using timestamp_ntz_from_parts
+        dt_col = builtin("timestamp_ntz_from_parts")(
+            builtin("LAST_DAY")(dt_col, sf_date_or_time_part), to_time(dt_col)
+        )
+    dt_values = num_offsets.select(dt_col)
+    return _create_qc_from_snowpark_dataframe(dt_values)
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py
new file mode 100644
index 00000000000..ab71f314038
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py
@@ -0,0 +1,515 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+#
+# This file contains utils functions used by the groupby functionalities.
+#
+#
+
+import functools
+from collections.abc import Hashable
+from typing import Any, Literal, Optional, Union
+
+import pandas as native_pd
+from pandas._typing import IndexLabel
+
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import (
+    col,
+    count,
+    count_distinct,
+    dense_rank,
+    iff,
+    rank,
+    sum_distinct,
+    when,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import OrderedDataFrame
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    get_distinct_rows,
+    pandas_lit,
+)
+from snowflake.snowpark.modin.plugin.compiler import snowflake_query_compiler
+from snowflake.snowpark.modin.utils import hashable
+from snowflake.snowpark.window import Window
+
+BaseInternalKeyType = Union[
+    "snowflake.snowpark.modin.pandas.series.Series",  # type: ignore [name-defined] # noqa: F821
+    Hashable,
+]
+
+NO_GROUPKEY_ERROR = ValueError("No group keys passed!")
+
+
+def is_groupby_value_label_like(val: Any) -> bool:
+    """
+    Check if the groupby value can be treated as pandas label.
+    """
+    from snowflake.snowpark.modin.pandas.series import Series
+
+    # A pandas label is a hashable, and we exclude the callable, Series and Grouper, which are
+    # by values that should not be handled as pandas label of the dataframe.
+    return (
+        hashable(val)
+        and (not callable(val))
+        and not (isinstance(val, Series))
+        and not (isinstance(val, native_pd.Grouper))
+    )
+
+
+def check_is_groupby_supported_by_snowflake(
+    by: Any, level: Optional[IndexLabel], axis: int
+) -> bool:
+    """
+    Check if execution with snowflake engine is available for the groupby operations.
+
+    Args:
+        by: mapping, callable, label, pd.Grouper, SnowSeries, list of such.
+            Used to determine the groups for the groupby.
+        level: Optional[IndexLabel]. The IndexLabel can be int, level name, or sequence of such.
+            If the axis is a MultiIndex (hierarchical), group by a particular level or levels.
+        axis : 0, 1
+    Returns:
+        bool
+            Whether operations can be executed with snowflake sql engine.
+    """
+    # snowflake execution is not support for groupby along rows
+    if axis != 0:
+        return False
+
+    if by is not None and level is not None:
+        # the typical usage for by and level both configured is when dict is used as by items. For example:
+        # {"one": 0, "two": 0, "three": 1}, which maps label "one" and "two" to level 0, and "three"
+        # to level 1. For detailed example, please check test_groupby_level_mapper.
+        # Since we do not have distributed support for by as a mapper, we do not provide distributed support
+        # when both by and level is configured for now.
+        return False
+
+    # Check if by is already a list, if not, construct a list of the element for uniform process in later step.
+    # Note that here we check list type specifically instead of is_list_like because tuple (('a', 'b')) and
+    # SnowSeries are also treated as list like, but we want to construct a list of the whole element like [('a', 'b')],
+    # instead of converting the element to list type like ['a', 'b'] for checking.
+    by_list = by if isinstance(by, list) else [by]
+    # validate by columns, the distributed implementation only supports columns that
+    # is columns belong to the current dataframe, which is represented as pandas hashable label.
+    # Please notice that callable is also a hashable, so a separate check of callable
+    # is applied.
+    if any(not is_groupby_value_label_like(o) for o in by_list):
+        return False
+
+    return True
+
+
+def validate_groupby_columns(
+    query_compiler: "SnowflakeQueryCompiler",  # type: ignore[name-defined] # noqa: F821
+    by: Union[BaseInternalKeyType, list[BaseInternalKeyType]],
+    axis: int,
+    level: Optional[IndexLabel],
+) -> None:
+    """
+    Check whether the groupby items are valid. Detailed check is only available along column-wise (axis=0),
+    row-wise (axis = 1) calls fallback today, detailed check will be done within the fallback call by pandas.
+
+    Raises:
+        ValueError if no by/key item is passed
+        KeyError if a hashable label in by (groupby items) can not be found in the current dataframe
+        ValueError if more than one column can be found for the groupby item
+        ValueError or IndexError if no corresponding level can be found in the current dataframe
+    """
+    if by is not None and level is not None:
+        # no distributed implementation support is available when both by and level are configured
+        return
+
+    if by is not None:  # perform check on by items
+        # convert by to list if it is not a list for easy process, and also calculate is_external_by.
+        # The is_external_by is used to indicate whether the length of the by if by is a list is the
+        # same as the length of the dataframe along the axis. For example, if we have a dataframe with 4
+        # rows (axis=0), if the by items is a list with 4 elements like [1, 1, 2, 2], the is_external_by
+        # is True, otherwise it is false. When the length is the same, pandas views the by item as a valid
+        # external array, and skip the validation check. Even if the list may contain pandas label which will
+        # be treated as groupby column later.
+        # 'is_external_by' is False for following cases:
+        # 1. If 'by' is not a list.
+        # 2. 'by' is a list and all elements in it are valid internal labels.
+        # 3. OR 'by' is a list and length does not match with length of dataframe.
+
+        if not isinstance(by, list):
+            by_list = [by]
+            is_external_by = False
+        else:
+            by_list = by
+            _, internal_by = groupby_internal_columns(
+                query_compiler._modin_frame, by_list
+            )
+            if len(internal_by) == len(by_list):
+                # If all elements in by_list are valid internal labels we don't need to
+                # check length of by_list against length of dataframe.
+                is_external_by = False
+            else:
+                is_external_by = query_compiler.get_axis_len(axis) == len(by_list)
+
+        if len(by_list) == 0:
+            raise NO_GROUPKEY_ERROR
+
+        # This check is only applied when is_external_by is False, this is the same as pandas behavior.
+        # axis is 1 currently calls fallback, we skip the client side check for now.
+        if axis == 0 and not is_external_by:
+            # get the list of groupby item that is hashable but not a callable
+            by_label_list = list(filter(is_groupby_value_label_like, by_list))
+            internal_frame = query_compiler._modin_frame
+
+            for pandas_label, snowflake_quoted_identifiers in zip(
+                by_label_list,
+                internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    by_label_list
+                ),
+            ):
+                if len(snowflake_quoted_identifiers) == 0:
+                    if pandas_label is None:
+                        # This is to stay consistent with the pandas error, if no corresponding
+                        # column has pandas label None, it raises type error since None is not
+                        # a callable also.
+                        raise TypeError("'NoneType' object is not callable")
+                    raise KeyError(pandas_label)
+                # pandas does not allow one group by column name to be mapped to multiple
+                # columns, and ValueError is raised under such situation
+                if len(snowflake_quoted_identifiers) > 1:
+                    raise ValueError(f"Grouper for '{pandas_label}' not 1-dimensional")
+    elif level is not None:  # perform validation on level
+        level_list = [level] if not isinstance(level, (tuple, list)) else level
+        if len(level_list) == 0:
+            raise NO_GROUPKEY_ERROR
+        if len(level_list) > 1 and not query_compiler.is_multiindex(axis=axis):
+            raise ValueError("multiple levels only valid with MultiIndex")
+
+        # call parse_level_to_integer_level to perform check that all levels are valid
+        # levels in the current dataframe. Note that parse_level_to_integer_level is only
+        # called for validation purpose, the returned result (the corresponding integer
+        # levels) is not used.
+        _ = query_compiler._modin_frame.parse_levels_to_integer_levels(
+            level, allow_duplicates=True
+        )
+
+
+def groupby_internal_columns(
+    frame: InternalFrame,
+    by: Union[BaseInternalKeyType, list[BaseInternalKeyType]],
+) -> tuple[list[BaseInternalKeyType], list[Hashable]]:
+    """
+    Extract internal columns from by argument of groupby. The internal
+    columns are columns from the current dataframe.
+
+    Parameters
+    ----------
+    frame: the internal frame to apply groupby on
+    by : Snowpark pandas Series, column/index label or list of the above
+
+    Returns
+    -------
+    by : list of Snowpark pandas Series, column or index label
+    internal_by : list of str
+        List of internal column name to be dropped during groupby.
+    """
+
+    internal_by: list[Hashable]
+    return_by: list[BaseInternalKeyType]
+    if not isinstance(by, list):
+        by_list = [by] if by is not None else []
+    else:
+        by_list = by
+
+    # this part of code relies on the fact that all internal by columns have been
+    # processed into column labels. SnowSeries that does not belong to the current
+    # dataframe remains as SnowSeries, and not counted as internal groupby columns.
+    internal_by = [
+        o
+        for o in by_list
+        if hashable(o)
+        and o in frame.data_column_pandas_labels + frame.index_column_pandas_labels
+    ]
+    return_by = by_list
+    return return_by, internal_by
+
+
+def get_groups_for_ordered_dataframe(
+    ordered_dataframe: OrderedDataFrame,
+    groupby_snowflake_quoted_identifiers: list[str],
+) -> OrderedDataFrame:
+    """
+    Get all distinct groups for the dataframe.
+
+    Args:
+        ordered_dataframe: OrderedDataFrame. Dataframe to extract groups.
+        groupby_snowflake_quoted_identifiers: quoted identifiers for columns to group on for extracting
+            the distinct groups.
+
+    Returns:
+        OrderedDataFrame contains only the groupby columns with distinct group values.
+    """
+    return get_distinct_rows(
+        ordered_dataframe.select(groupby_snowflake_quoted_identifiers)
+    )
+
+
+def extract_groupby_column_pandas_labels(
+    query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    by: Any,
+    level: Optional[IndexLabel],
+) -> Optional[list[Hashable]]:
+
+    """
+    Extracts the groupby pandas labels from the by and level parameters and returns as a list.
+    Parameters
+    ----------
+    query_compiler: the query compiler of the internal frame to group on.
+    by: mapping, series, callable, lable, pd.Grouper, BaseQueryCompiler, list of such
+        Used to determine the groups for the groupby.
+    level: int, level name, or sequence of such, default None. If the axis is a
+        MultiIndex(hierarchical), group by a particular level or levels. Do not specify
+        both by and level.
+    """
+    internal_frame = query_compiler._modin_frame
+
+    # Distributed implementation support is currently unavailable when both by and level are configured,
+    # and the check is done by check_groupby_agg_distribute_execution_capability_by_args. Once reach here,
+    # only one of by or level can be None.
+    #
+    # Get the groupby columns (by_list) and record the by columns that are index columns of the original
+    # dataframe (index_by_columns).
+    # The index_by_columns is used for as_index = False, when as_index is False, it drops all by columns
+    # that are index columns in the original dataframe, but still retains all by columns that are data columns
+    # from originally data frame. For example:
+    # for a dataframe with index = [`A`, `B`], data = [`C`, `D`, `E`],
+    #  with groupby([`A`, `C`], as_index=True).max(), the result will have index=[`A`, `C`], data=[`D`, `E`]
+    #  with groupby([`A`, `C`], as_index=False).max(), the result will have index=[None] (default range index),
+    #  data=[`C`, `D`, `E`], columns `A` is dropped, and `C` is retained
+    if by is not None:
+        # extract the internal by columns which are groupby columns from the current dataframe
+        # internal_by: a list of column labels that are columns from the current dataframe
+        # by: all by columns in the form of list, contains both internal and external groupby columns
+        # when len(by) > len(internal_by) that means there are external groupby columns
+        by_list, internal_by = groupby_internal_columns(internal_frame, by)
+
+        # when len(by_list) > len(internal_by), there are groupby columns that do not belong
+        # to the current dataframe. we do not support this case.
+        if len(by_list) > len(internal_by):
+            return None
+        by_list = internal_by
+    elif level is not None:  # if by is None, level must not be None
+        int_levels = internal_frame.parse_levels_to_integer_levels(
+            level, allow_duplicates=True
+        )
+        by_list = internal_frame.get_pandas_labels_for_levels(int_levels)
+    else:
+        # we should never reach here
+        raise ValueError("Neither level or by is configured!")  # pragma: no cover
+    return by_list
+
+
+# TODO: SNOW-939239 clean up fallback logic
+def get_frame_with_groupby_columns_as_index(
+    query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    by: Any,
+    level: Optional[Union[Hashable, int]],
+    dropna: bool,
+) -> Optional["snowflake_query_compiler.SnowflakeQueryCompiler"]:
+    """
+    Returns a new dataframe with the following properties:
+    1) The groupby columns are used as the new index columns
+    2) An index column of the original dataframe that doesn't belong to the new dataframe is dropped
+    3) All data columns in the original dataframe are retained even if it becomes an index column
+
+    df = pd.DataFrame({"A": [0, 1, 2], "B": [2, 1, 1], "C": [2, 2, 0], "D": [3,4,5]})
+    df = df.set_index(['A', 'B'])
+          C  D
+    A  B
+    0  2  2  3
+    1  1  2  4
+    2  1  0  5
+
+    get_frame_with_groupby_columns_as_index(query_compiler, ['A', 'C'], None, True)
+
+    the frame returned to the caller would be:
+          C  D
+    A  C
+    0  2  2  3
+    1  2  2  4
+    2  0  0  5
+
+    Parameters
+    ----------
+    query_compiler: the query compiler of the internal frame to group on.
+    by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such
+        Used to determine the groups for the groupby.
+    level: int, level name, or sequence of such, default None. If the axis is a
+        MultiIndex(hierarchical), group by a particular level or levels. Do not specify
+        both by and level.
+    dropna: bool. if dropna is set to True, the returned dataframe will exclude rows
+        that contain NA values.
+
+    Returns
+    -------
+    SnowflakeQueryCompiler that contains a new internal frame. The function
+    will return None when both level and by are configured.
+    """
+
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    by_list = extract_groupby_column_pandas_labels(query_compiler, by, level)
+
+    if by_list is None:
+        return None
+
+    # reset the index for the frame to the groupby columns, and drop off the
+    # index columns that are not used as groupby columns (append = False), also
+    # retains the original data column if it is used as groupby columns (drop = False).
+    qc = query_compiler.set_index_from_columns(by_list, drop=False, append=False)
+
+    internal_frame = qc._modin_frame
+    ordered_dataframe = internal_frame.ordered_dataframe
+
+    # drop the rows if any value in groupby key is NaN
+    if dropna:
+        ordered_dataframe = ordered_dataframe.dropna(
+            subset=internal_frame.index_column_snowflake_quoted_identifiers
+        )
+
+    return SnowflakeQueryCompiler(
+        InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+            data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        )
+    )
+
+
+def make_groupby_rank_col_for_method(
+    col_ident: str,
+    by_list: list[str],
+    method: Literal["min", "first", "dense", "max", "average"],
+    na_option: Literal["keep", "top", "bottom"],
+    ascending: bool,
+    pct: bool,
+    ordering_column_identifiers: list[str],
+    count_val: ColumnOrName,
+    dropna: bool,
+) -> SnowparkColumn:
+    """
+    Helper function to get the rank Snowpark Column for method parameters {"min", "first", "dense", "max", "average"} and
+    na_option parameters {"keep", "top", "bottom"}.
+
+    Parameters
+    ----------
+    col_ident : str
+        Column quoted identifier
+    by_list: List[str]
+        List of column quoted identifiers used to group by
+    method: str
+        Rank method value from {"min", "first", "dense", "max", "average"}
+    na_option: str
+        Rank na_option value from {"keep", "top", "bottom"}
+    ascending: bool
+        Whether the elements should be ranked in ascending order.
+    pct: bool
+        Whether to display the returned rankings in percentile form.
+    ordering_column_identifiers: List[str]
+        List of ordering column quoted identifiers to get row value
+    count_val: str
+        Ordering column quoted identifier to get count value
+    dropna: bool
+        Whether NA values should be dropped.
+
+
+    Returns
+    -------
+    Column
+        The SnowparkColumn corresponding to the rank column.
+    """
+
+    # When na_option is 'top', null values are assigned the lowest rank. They need to be sorted before
+    # non-null values.
+    # For all other na_option {'keep', 'bottom'}, null values can be sorted after non-null values.
+    if ascending:
+        if na_option == "top":
+            col_ident_value = col(col_ident).asc_nulls_first()
+        else:
+            col_ident_value = col(col_ident).asc_nulls_last()
+    else:
+        # If ascending is false, need to sort column in descending order
+        if na_option == "top":
+            col_ident_value = col(col_ident).desc_nulls_first()
+        else:
+            col_ident_value = col(col_ident).desc_nulls_last()
+
+    # use Snowflake DENSE_RANK function when method is 'dense'.
+    if method == "dense":
+        rank_func = dense_rank()
+    else:  # methods 'min' and 'first' use RANK function
+        rank_func = rank()
+
+    # We want to calculate the rank within the ordered group of column values
+    order_by_list = [col_ident_value]
+    # When method is 'first', rank is assigned in order of the values appearing in the column.
+    # So we need to also order by the row position value.
+    if method == "first":
+        order_by_list.extend(ordering_column_identifiers)
+    # For na_option {'keep', 'bottom'}, the rank column is calculated with the specified rank function and
+    # the order by clause
+    rank_col = rank_func.over(Window.partition_by(by_list).order_by(order_by_list))
+
+    if dropna:
+        dropna_cond = functools.reduce(
+            lambda combined_col, col: combined_col | col,
+            map(
+                lambda by_snowflake_quoted_identifier: col(
+                    by_snowflake_quoted_identifier
+                ).is_null(),
+                by_list,
+            ),
+        )
+
+        rank_col = iff(dropna_cond, pandas_lit(None), rank_col)
+
+    if method == "max":
+        rank_col = rank_col - 1 + count_val
+
+    if method == "average":
+        rank_col = (2 * rank_col - 1 + count_val) / 2
+
+    # For na_option 'keep', if the value is null then we assign it a null rank
+    if na_option == "keep":
+        rank_col = when(col(col_ident).is_null(), None).otherwise(rank_col)
+
+    if pct:
+        window = (
+            Window.partition_by(by_list)
+            .order_by(col_ident_value)
+            .rows_between(Window.UNBOUNDED_PRECEDING, Window.UNBOUNDED_FOLLOWING)
+        )
+        if method == "dense":
+            # dense rank uses the number of distinct values in column for percentile denominator to make sure rank
+            # scales to 100% while non-dense rank uses the total number of values for percentile denominator.
+            if na_option == "keep":
+                # percentile denominator for dense rank is the number of distinct non-null values in the column
+                total_cols = count_distinct(col(col_ident)).over(window)
+            else:
+                # percentile denominator for dense rank is the distinct values in a column including nulls
+                total_cols = (count_distinct(col(col_ident)).over(window)) + (
+                    sum_distinct(iff(col(col_ident).is_null(), 1, 0)).over(window)
+                )
+        else:
+            if na_option == "keep":
+                # percentile denominator for rank is the number of non-null values in the column
+                total_cols = count(col(col_ident)).over(window)
+            else:
+                # percentile denominator for rank is the total number of values in the column including nulls
+                total_cols = count("*").over(window)
+        rank_col = rank_col / total_cols
+    return rank_col
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
new file mode 100644
index 00000000000..3906dd748df
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/indexing_utils.py
@@ -0,0 +1,3201 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import typing
+from collections.abc import Hashable
+from enum import Enum
+from typing import Any, Literal, Optional, Union
+
+import numpy as np
+from pandas._typing import AnyArrayLike, Scalar
+from pandas.api.types import is_list_like
+from pandas.core.common import is_bool_indexer
+from pandas.core.dtypes.common import is_bool_dtype, is_float_dtype, is_integer_dtype
+from pandas.core.dtypes.inference import is_integer, is_scalar
+from pandas.core.indexers.utils import is_list_like_indexer
+from pandas.core.indexing import IndexingError
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark.functions import (
+    Column,
+    cast,
+    ceil,
+    coalesce,
+    col,
+    floor,
+    get,
+    greatest,
+    iff,
+    is_null,
+    lag,
+    last_value,
+    least,
+    lit,
+    max as max_,
+    min as min_,
+    trunc,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.join_utils import (
+    InheritJoinIndex,
+    JoinKeyCoalesceConfig,
+    align,
+    align_on_index,
+    join,
+)
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import OrderingColumn
+from snowflake.snowpark.modin.plugin._internal.type_utils import (
+    NUMERIC_SNOWFLAKE_TYPES_TUPLE,
+    is_numeric_snowpark_type,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    DEFAULT_DATA_COLUMN_LABEL,
+    ITEM_VALUE_LABEL,
+    MAX_ROW_POSITION_COLUMN_LABEL,
+    ROW_COUNT_COLUMN_LABEL,
+    ROW_POSITION_COLUMN_LABEL,
+    append_columns,
+    is_integer_list_like,
+    pandas_lit,
+    rindex,
+)
+from snowflake.snowpark.modin.plugin.compiler import snowflake_query_compiler
+from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL
+from snowflake.snowpark.types import (
+    ArrayType,
+    BooleanType,
+    IntegerType,
+    _FractionalType,
+    _IntegralType,
+)
+from snowflake.snowpark.window import Window
+
+UNALIGNABLE_INDEXING_ERROR = IndexingError(
+    "Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not "
+    "match)."
+)
+ILOC_INT_ONLY_INDEXING_ERROR_MESSAGE = (
+    "Location based indexing can only have [integer, integer slice (START point is "
+)
+"INCLUDED, END point is EXCLUDED), list-like of integers, boolean array] types."
+
+MULTIPLE_ELLIPSIS_INDEXING_ERROR_MESSAGE = "indexer may only contain one '...' entry"
+TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE = "Too many indexers"
+CANNOT_REINDEX_ON_DUPLICATE_ERROR_MESSAGE = (
+    "cannot reindex on an axis with duplicate labels"
+)
+SETTING_NON_UNIQUE_COLUMNS_IS_NOT_ALLOWED_ERROR_MESSAGE = (
+    "Setting with non-unique columns is not allowed."
+)
+LAST_VALUE_COLUMN = "last_value"
+LOC_SET_ITEM_SHAPE_MISMATCH_ERROR_MESSAGE = (
+    "shape mismatch: the number of columns {} from the item does not match with the number of columns {} "
+    "to set"
+)
+LOC_SET_ITEM_KV_MISMATCH_ERROR_MESSAGE = (
+    "Must have equal len keys and value when setting with an iterable"
+)
+LOC_SET_ITEM_EMPTY_ERROR = "The length of the value/item to set is empty"
+
+
+# Used for `first_valid_index` and `last_valid_index` Snowpark pandas APIs
+class ValidIndex(Enum):
+    FIRST = "first"
+    LAST = "last"
+
+
+def get_valid_index_values(
+    frame: InternalFrame,
+    first_or_last: ValidIndex,
+) -> "Optional[Row]":  # type: ignore # noqa
+    """
+    Given an InternalFrame consisting of an array of booleans, filter for True values
+    and get first or last index.
+
+    Parameters
+    ----------
+    frame: InternalFrame
+    first_or_last: Enum specifying which valid index to return.
+        Can be either ValidIndex.FIRST or ValidIndex.LAST.
+
+    Returns
+    -------
+    Optional[Row]: The desired index (a Snowpark Row) if it exists, else None.
+    """
+    index_quoted_identifier = frame.index_column_snowflake_quoted_identifiers
+    data_quoted_identifier = frame.data_column_snowflake_quoted_identifiers
+    row_position_quoted_identifier = frame.row_position_snowflake_quoted_identifier
+    ordered_dataframe = frame.ordered_dataframe.filter(
+        col(data_quoted_identifier[0]) == pandas_lit(True)
+    )
+    # Since first_valid_index and last_valid_index return a Scalar, we are always going to eagerly compute
+    if first_or_last is ValidIndex.FIRST:
+        valid_index_values = (
+            ordered_dataframe.select(index_quoted_identifier).limit(1).collect()
+        )
+    else:
+        assert first_or_last is ValidIndex.LAST
+        valid_index_values = (
+            ordered_dataframe.sort(
+                [OrderingColumn(row_position_quoted_identifier, ascending=False)]
+            )
+            .select(index_quoted_identifier)
+            .limit(1)
+            .collect()
+        )
+
+    # If no valid indices, return None
+    if len(valid_index_values) == 0:
+        return None
+    # Else return the desired Row
+    else:
+        return valid_index_values[0]
+
+
+def convert_snowpark_row_to_pandas_index(
+    valid_index_values: "Row",  # type: ignore # noqa
+    index_dtypes: list[np.dtype],
+) -> Union[Scalar, tuple[Scalar]]:
+    """
+    Converts Snowpark Row to pandas index.
+
+    Parameters
+    ----------
+    valid_index_values: Snowpark Row representing an Index
+    index_dtypes: Corresponding dtypes for given Index
+
+    Returns
+    -------
+    scalar or None, Tuple of scalars if MultiIndex
+    """
+    valid_index = valid_index_values[:-1]
+    valid_index_list: list[Scalar] = []
+    for level in range(len(index_dtypes)):
+        # Special Case where index=None
+        if not valid_index[level]:
+            # If index dtype is Object, index=None
+            if index_dtypes[level] == np.dtype("O"):
+                valid_index_list.append(None)
+            # If index dtype is not Object, index=np.nan
+            else:
+                valid_index_list.append(np.nan)
+        else:
+            valid_index_list.append(valid_index[level])
+    # MultiIndex case we return a Tuple
+    if len(valid_index_list) > 1:
+        return tuple(valid_index_list)
+    # Return a Scalar if non-MultiIndex
+    return valid_index_list[0]
+
+
+def validate_out_of_bound(key_max: Any, key_min: Any, axis_len: int) -> None:
+    """
+    Raise indexError if any of value of key is out of bound.
+
+    For both list-like or series type, if key is a tuple containing list-like or series, e.g. key = (Series([-7.9]), ),
+    or key = ([-7.9], ), in native pandas the valid range is -row_len <= value < row_len. Whereas in Snowpark pandas API
+    the valid range is -row_len-1 < value < row_len.
+    If key is just list-like or series, e.g. key = Series([-7.9]) or key = [-7.9] in both native pandas and Snowpark
+    pandas API the valid range is -row_len-1 < value < row_len
+
+    Parameters
+    ----------
+    key_max: Union[float, int, np.Numerical]
+    key_min: Union[float, int, np.Numerical]
+    axis_len: length of the axis we are validating.
+
+    Raises
+    ----------
+    IndexError
+    """
+    if key_max >= axis_len or key_min <= -axis_len - 1:
+        raise IndexError("positional indexers are out-of-bounds")
+
+
+# TODO: SNOW-916739 this method needs refactoring to improve its readability and structure
+def convert_positional_key(
+    query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    key: Any,
+    axis: int,
+) -> Union[InternalFrame, slice, Union[list[int], list[float]], int]:
+    """Convert iloc input key for rows when axis == 0 or columns when axis == 1
+
+    Conversion of each input type:
+    Series[bool]:
+        raise Error.
+    array_like[bool]:
+        convert to List. If axis = 0, convert to slice(None) or InternalFrame.
+    Series[none_bool]:
+        Out-of-bound check.
+        If axis = 0: convert to int then negative to positive return its InternalFrame
+        else: convert to List then following list_like input procedure.
+    slice or range_like:
+        convert to List or a new slice if none-empty, axis = 0, and step is 1 or -1.
+    List[int] | List[float]:
+        convert to int then negative to positive, return List
+        If axis = 0, convert to InternalFrame.
+    integer:
+        convert negative to positive
+        If axis == 1 convert to List; else convert to int.
+
+    Return
+    ------
+    Union[InternalFrame, slice, List[int], int]:
+        axis == 0: InternalFrame, slice, or int
+        axis == 1: List[int]
+            int type for axis == 1 could be other types of integer. See Notes for details.
+
+    Notes
+    ------
+    slice(None) means selecting every item of that axis.
+    range_like: Any type has attribute __iter__, start, stop, and step.
+    integer: the Cython equivalent of `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)`.
+            See source code in pandas/_libs/tslibs/util.pxd:is_integer_object for details.
+    """
+    from snowflake.snowpark.modin.pandas.indexing import is_range_like
+
+    if isinstance(key, pd.Series):
+        key_frame = key._query_compiler._modin_frame
+        if key.empty:
+            if axis == 1:
+                return []
+            return key_frame
+        key_max, key_min = key_frame.ordered_dataframe.agg(
+            max_(key_frame.data_column_snowflake_quoted_identifiers[0]),
+            min_(key_frame.data_column_snowflake_quoted_identifiers[0]),
+        ).collect()[0]
+        # out-of-bound check:
+        validate_out_of_bound(key_max, key_min, query_compiler.get_axis_len(axis))
+
+        # if axis == 1 convert to np.ndarray and go through conversion of list-like type
+        if axis == 1:
+            key = key.to_numpy().tolist()
+
+        # else axis == 0:
+        # If the series has integer type and all values are non-negative, return the original series's InternalFrame.
+        # Else cast values to int type by ceiling the negative value and flooring the possitive value, then if there is
+        # any negative value, cast negative values to positive. Return the modified series's InternalFrame.
+        else:
+            if is_integer_dtype(key.dtype) and key_min >= 0:
+                return key_frame
+            data_col = col(key_frame.data_column_snowflake_quoted_identifiers[0])
+            # If the dtype is numerical but not integer, cast to int type
+            # Note: Same as pandas iloc, negative values are upcast and positive values are downcast to nearest integer.
+            # e.g. df.iloc[[-0.9, -1.0, -1.1, 0.0, 1.0, 0.9, 1.1]] is like cast to df.iloc[[0, -1, -1, 0, 1, 0, 1]]
+            if not is_integer_dtype(key.dtype):
+                data_col = cast(
+                    iff(
+                        data_col < 0,
+                        ceil(data_col),
+                        floor(data_col),
+                    ),
+                    IntegerType(),
+                )
+            # If the minimum value is smaller or equal to -1, add axis_len to the negative value
+            # Note: if -1 < key_min < 0, all negative values will be cast to 0 during numeric to int conversion.
+            if key_min <= -1:
+                axis_len = query_compiler.get_axis_len(axis)
+                data_col = iff(
+                    data_col < 0,
+                    data_col + axis_len,
+                    data_col,
+                )
+            # convert key_frame: since `is_integer_dtype(key.dtype) and key_min >= 0` case is returned earlier,
+            # data_col is guaranteed to be modified
+            key_frame = key_frame.update_snowflake_quoted_identifiers_with_expressions(
+                {key_frame.data_column_snowflake_quoted_identifiers[0]: data_col}
+            ).frame
+            return key_frame
+
+    # If `key` is a boolean array_like indexer, convert to a List with the array-index of True values
+    if is_bool_indexer(key):
+        key = [index for index, val in enumerate(key) if val]
+    # If key is Slice type or range_like:
+    # e.g. [1:5:2], range(5)
+    # If step is 1 or -1, axis == 0, and sliced outcome is not empty: return slice which will be called with filter
+    # Else: convert to List which later is converted to slice(None) if axis == 0 and has the same len as the axis
+    elif isinstance(key, slice) or is_range_like(key):
+        if key.step == 0:
+            # same as pandas
+            raise ValueError("slice step cannot be zero")
+        # If axis == 0 and key = slice(None) means select all rows thus we do nothing.
+        if axis == 0 and isinstance(key, slice) and key == slice(None):
+            return key
+
+        # for slice or range like key, if step is None, it is treated the same as 1
+        step = 1 if key.step is None else key.step
+        key = list(range(query_compiler.get_axis_len(axis)))[
+            slice(key.start, key.stop, key.step)
+        ]
+        if axis == 0:
+            if len(key) == 0:
+                # When len(key) == 0, it means no row needs to be selected, simply set the range to (0, 0),
+                # which will apply filter row_position >= 0 and row_position < 0, instead of going through join.
+                return slice(0, 0)
+            elif len(key) > 0 and step == 1:
+                # Convert to List first then return a new slice to deal with negative int, and empty slice
+                # e.g. slice(-1,3) out of 6, in filter row_position >= -1 and <3 is not empty.
+                # Note: step=-1 can not return slice since filter can not assure the reversed order.
+                return slice(int(key[0]), int(key[-1]) + 1)
+    elif is_list_like_indexer(key):
+        # convert float like keys to integers
+        if not is_integer_list_like(key):
+            key = np.array(key)
+            assert is_float_dtype(
+                key.dtype
+            ), "list-like key must be list of int or float"
+            key = key.astype(int)
+        axis_len = query_compiler.get_axis_len(axis)
+        key = [val + axis_len if val < 0 else val for val in key]
+    else:  # integer type
+        if key < 0:
+            key += query_compiler.get_axis_len(axis)
+        if axis == 0:
+            return key
+        return [key]
+
+    # Return column indexing key (axis=1) as List.
+    if axis == 1:
+        return key
+
+    # Convert and return row indexing key (axis=0) as InternalFrame type to join df that calls iloc.
+    return pd.Series(key)._query_compiler._modin_frame
+
+
+def get_frame_by_row_pos_frame(
+    internal_frame: InternalFrame,
+    key: InternalFrame,
+) -> InternalFrame:
+    """
+    Select rows from this internal_frame by row positions in the key frame
+    Args:
+        internal_frame: the main frame to select
+        key: the frame containing row positions
+
+    Returns:
+        The selected frame
+    """
+    # check data type
+    key_datatype = key.quoted_identifier_to_snowflake_type()[
+        key.data_column_snowflake_quoted_identifiers[0]
+    ]
+    # implicitly allow float types to be compatible with pandas
+    assert isinstance(
+        key_datatype, NUMERIC_SNOWFLAKE_TYPES_TUPLE
+    ), f"get_frame_by_row_pos_frame's key data type must be bool or int, not {key_datatype=}"
+
+    # boolean indexer
+    if isinstance(key_datatype, BooleanType):
+        return _get_frame_by_row_pos_boolean_frame(internal_frame, key)
+
+    # int indexer
+    if isinstance(key_datatype, _FractionalType):
+        # implicitly convert float to int using trunc, e.g., -1.1 => -1, 2.9=> 2
+        key_val_id = key.data_column_snowflake_quoted_identifiers[0]
+        key = key.append_column("int_value", trunc(col(key_val_id), scale=0))
+        # drop the original float column from data columns
+        key = InternalFrame.create(
+            ordered_dataframe=key.ordered_dataframe,
+            data_column_snowflake_quoted_identifiers=key.data_column_snowflake_quoted_identifiers[
+                1:
+            ],
+            data_column_pandas_labels=key.data_column_pandas_labels[1:],
+            data_column_pandas_index_names=key.data_column_pandas_index_names,
+            index_column_snowflake_quoted_identifiers=key.index_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=key.index_column_pandas_labels,
+        )
+    return _get_frame_by_row_pos_int_frame(internal_frame, key)
+
+
+def _get_frame_by_row_pos_boolean_frame(
+    internal_frame: InternalFrame, key: InternalFrame
+) -> InternalFrame:
+    """
+    Select rows using the boolean frame positional key. The two frames will be inner joined on their row position column
+    and only rows with true key value will be selected.
+
+    Args:
+        internal_frame: the frame to perform positional selection
+        key: the boolean frame to
+
+    Returns:
+        new frame with selected rows
+    """
+    internal_frame = internal_frame.ensure_row_position_column()
+    key = key.ensure_row_position_column()
+
+    # inner join internal_frame with key frame on row_position
+    joined_frame, result_column_mapper = join(
+        internal_frame,
+        key,
+        how="inner",
+        left_on=[internal_frame.row_position_snowflake_quoted_identifier],
+        right_on=[key.row_position_snowflake_quoted_identifier],
+    )
+
+    # only true value in key's data column will be selected
+    filtered = joined_frame.filter(
+        joined_frame.data_column_snowflake_quoted_identifiers[-1]
+    )
+
+    # Note: we still use internal_frame's pandas labels to handle multiindex correctly.
+    return InternalFrame.create(
+        ordered_dataframe=filtered.ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=result_column_mapper.map_left_quoted_identifiers(
+            internal_frame.data_column_snowflake_quoted_identifiers
+        ),
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=result_column_mapper.map_left_quoted_identifiers(
+            internal_frame.index_column_snowflake_quoted_identifiers
+        ),
+    )
+
+
+def _get_frame_by_row_pos_int_frame(
+    internal_frame: InternalFrame, key: InternalFrame
+) -> InternalFrame:
+    """
+    Select rows using the int frame positional key. The two frames will be inner joined on the internal_frame's row
+    position column and the key's data column (i.e., the positional values).
+
+    Args:
+        internal_frame: the frame to perform positional selection
+        key: the boolean frame to
+
+    Returns:
+        new frame with selected rows
+    """
+    joined_key_frame = _get_adjusted_key_frame_by_row_pos_int_frame(internal_frame, key)
+
+    internal_frame = internal_frame.ensure_row_position_column()
+
+    # join the new value column of the key with the row position column of the original dataframe
+    # to get the new rows. The result will be ordered by the key frame order followed by internal
+    # frame order.
+    joined, result_column_mapper = join(
+        joined_key_frame,
+        internal_frame,
+        left_on=joined_key_frame.data_column_snowflake_quoted_identifiers,
+        right_on=[internal_frame.row_position_snowflake_quoted_identifier],
+        how="inner",
+        inherit_join_index=InheritJoinIndex.FROM_RIGHT,
+    )
+
+    # Note:
+    # 1. keep using internal_frame's index and column labels
+    # 2. use the quoted identifiers from the joined result
+    return InternalFrame.create(
+        ordered_dataframe=joined.ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.data_column_snowflake_quoted_identifiers
+        ),
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.index_column_snowflake_quoted_identifiers
+        ),
+    )
+
+
+def _get_adjusted_key_frame_by_row_pos_int_frame(
+    internal_frame: InternalFrame, key: InternalFrame
+) -> InternalFrame:
+    """
+    Return the key frame with any negative row positions adjusted by the internal frame.  For example, if the original
+    frame has row_count=5 and the key frame has values [1, -1, -2, 3, -4] the resulting frame would be [1, 4, 3, 3, 1]
+
+    Args:
+        internal_frame: the frame relative row positions reference
+        key: the row position frame, including potential negative positions.
+
+    Returns:
+        new frame with adjusted row positions
+    """
+    internal_frame = internal_frame.ensure_row_position_column()
+    key = key.ensure_row_position_column()
+
+    # The value in the key can be negative, and can be transformed into a positive value by adding the row count of the
+    # internal frame. For example: if the key is pd.Series([-1, 0, 2, -2]), and the internal frame has total 4 rows,
+    # the final row we should get is pd.Series([-1 + 4, 0, 2, -2 + 4]), which is pd.Series([3, 0, 2, 2]).
+    # In order to achieve this, we do 1) append the total row count of internal frame as a new column to key use
+    # cross join. Note this needs to be done with cross join since the total row count comes from another dataframe,
+    # otherwise we can use count(*) over() if it is for the same dataframe.
+    # 2) Then project a new column by adding the total row count to the origin value if it is negative.
+    # First, get total row count of the internal frame.
+    # create a new frame (count_frame) with a single row that contains row count of the internal frame as
+    # the data column, and a row position column (0 since there is only one row after aggregation) as index column.
+    # step 1: ensure the internal frame has a row count column, the identifier is de-conflicted against
+    # the key frame, so there will be no conflict when join with the key.
+    count_ordered_dataframe = internal_frame.ensure_row_count_column().ordered_dataframe
+    row_count_snowflake_quoted_identifier = (
+        count_ordered_dataframe.row_count_snowflake_quoted_identifier
+    )
+    # We just want the row count - its the same for every row, so we don't need to sort.
+    count_ordered_dataframe = count_ordered_dataframe.limit(1, sort=False)
+
+    # step 2: add a row position column (actually, value 0) as index column\
+    count_ordered_dataframe = count_ordered_dataframe.ensure_row_position_column()
+
+    count_frame = InternalFrame.create(
+        ordered_dataframe=count_ordered_dataframe,
+        data_column_pandas_labels=[ROW_COUNT_COLUMN_LABEL],
+        data_column_snowflake_quoted_identifiers=[
+            row_count_snowflake_quoted_identifier
+        ],
+        index_column_pandas_labels=[ROW_POSITION_COLUMN_LABEL],
+        index_column_snowflake_quoted_identifiers=[
+            count_ordered_dataframe.row_position_snowflake_quoted_identifier
+        ],
+        data_column_pandas_index_names=[None],
+    )
+
+    # cross join the count with the key to append the count column with the key frame. For example: if the
+    # key frame is the following:
+    #       value
+    # 0     -1
+    # 1     4
+    # 2     2
+    # 3     -2
+    # and the total count of internal frame is 5, the joined result would be
+    #       value       row_count       row_position
+    # 0     -1          5               0
+    # 1     4           5               0
+    # 2     2           5               0
+    # 3     -1          5               0
+    # the new data column for the joined key frame would be [value, row_count]
+    joined_key_frame, result_column_mapper = join(
+        key,
+        count_frame,
+        "cross",
+        left_on=[],
+        right_on=[],
+        inherit_join_index=InheritJoinIndex.FROM_LEFT,
+    )
+
+    # project a new column to create the new value column
+    original_value_column = col(
+        result_column_mapper.map_left_quoted_identifiers(
+            key.data_column_snowflake_quoted_identifiers
+        )[0]
+    )
+    left_row_cnt_column = col(
+        result_column_mapper.map_right_quoted_identifiers(
+            count_frame.data_column_snowflake_quoted_identifiers
+        )[0]
+    )
+    joined_key_frame = joined_key_frame.project_columns(
+        [joined_key_frame.data_column_pandas_labels[0]],
+        [
+            iff(
+                original_value_column < 0,
+                original_value_column + left_row_cnt_column,
+                original_value_column,
+            )
+        ],
+    )
+
+    return joined_key_frame
+
+
+def get_frame_by_row_pos_slice_frame(
+    internal_frame: InternalFrame, key: slice
+) -> InternalFrame:
+    """
+    Select rows using the slice frame positional key. A filter will be applied on the frame based on the slice.
+
+    Args:
+        internal_frame: the frame to perform positional selection
+        key: slice
+
+    Returns:
+        new frame with selected rows
+    """
+    if key == slice(None):
+        return internal_frame
+    if key.step is not None and key.step == 0:
+        raise ValueError("slice step cannot be zero.")
+
+    # Row position column required for left and right bound comparison.
+    frame = internal_frame.ensure_row_position_column()
+    row_pos_col = col(frame.row_position_snowflake_quoted_identifier)
+
+    # create a count column which will be used to check step
+    frame = frame.append_column("count", pandas_lit(1) + max_(row_pos_col).over())
+    count_col = col(frame.data_column_snowflake_quoted_identifiers[-1])
+
+    ordering_columns = internal_frame.ordering_columns
+    start, stop, step = key.start, key.stop, key.step
+
+    def make_positive(val: int) -> Column:
+        # Helper to turn negative start and stop values to positive values. Example: -1 --> num_rows - 1.
+        return val + count_col if val < 0 else pandas_lit(val)
+
+    step = 1 if step is None else step
+    if step < 0:
+        # Set ascending to False if step is negative.
+        ordering_columns = [
+            OrderingColumn(
+                internal_frame.row_position_snowflake_quoted_identifier, ascending=False
+            )
+        ]
+        # Switch start and stop; convert given slice key into a similar slice key with positive step.
+        start, stop = stop, start
+        start = 0 if start is None else make_positive(start) + 1
+        stop = count_col - 1 if stop is None else make_positive(stop)
+    else:  # step > 0
+        # Assign default values or convert to positive values.
+        start = pandas_lit(0) if start is None else make_positive(start)
+        stop = (count_col if stop is None else make_positive(stop)) - 1
+
+    # Both start and stop are inclusive.
+    left_bound_filter = row_pos_col >= start
+    right_bound_filter = row_pos_col <= stop
+
+    if step > 1:
+        # start can be negative --> make the lower-bound 0.
+        step_bound_filter = (
+            (row_pos_col - greatest(pandas_lit(0), least(start, count_col - 1)))
+            % pandas_lit(step)
+        ) == 0
+    elif step < -1:
+        # Similarly, stop can be too large --> make the upper-bound the max row number.
+        step_bound_filter = (
+            (greatest(pandas_lit(0), least(stop, count_col - 1)) - row_pos_col)
+            % pandas_lit(abs(step))
+        ) == 0
+    else:  # abs(step) == 1, so all values in range are included.
+        step_bound_filter = pandas_lit(True)
+
+    filter_cond = left_bound_filter & right_bound_filter & step_bound_filter
+    ordered_dataframe = frame.ordered_dataframe.filter(filter_cond)
+    ordered_dataframe = ordered_dataframe.sort(ordering_columns)
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+class LocSetColInfo(typing.NamedTuple):
+    """Loc setitem information context for columns"""
+
+    # positions for existing columns in the dataframe that needs to be set, can have duplicates and order matters
+    existing_column_positions: list[int]
+
+    # number of times each column in the original frame needs to be duplicated in the result frame
+    existing_column_duplication_cardinality_map: dict[int, int]
+
+    # pandas labels for new columns that needs to be inserted in the result frame
+    new_column_pandas_labels: list[Hashable]
+
+    # pandas labels for the columns to set
+    column_pandas_labels: list[Hashable]
+
+
+def _extract_loc_set_col_info(
+    internal_frame: InternalFrame,
+    columns: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler",
+        Scalar,
+        tuple,
+        slice,
+        list,
+        pd.Index,
+        np.ndarray,
+    ],
+) -> LocSetColInfo:
+    """
+    Extract the column information needed for loc set
+
+    Args:
+        internal_frame: the main frame to perform loc set
+        columns: the column locator
+    Returns:
+        A LocSetColumnInfo named tuple
+    """
+    # Note: enlargement can happen in loc set especially when ``index`` and ``columns`` are scalar. Furthermore,
+    # enlargement can happen when the columns type is any array like and data type is not boolean. Specifically,
+    # Two types of enlargement cases on columns are possible:
+    #   1. duplicate existing columns
+    #   2. append new columns
+    # E.g., existing columns are ["C", "A", "B"] and the columns are ["C", "B", "E", 1, "B", "X", "C", 2, "C" ], then
+    # the result columns will be ["C", "C", "C", "A", "B", "B", "E", 1, "X", 2]
+    # Here we track those two cases using the following two variables:
+    existing_column_duplication_cardinality_map: dict[int, int] = {}
+    """e.g., {1: 2, 2:1}, i.e., "A" needs to duplicate twice and "C" needs to be duplicated once"""
+    new_column_pandas_labels = []
+    """e.g., ["E", 1, "X", 2]"""
+
+    # column enlargement may happen when the ``columns`` type is not slice and not boolean indexer.
+    enlargement_may_happen = False
+    if is_scalar(columns):
+        columns = [columns]
+    if not isinstance(columns, slice):
+        from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+            SnowflakeQueryCompiler,
+        )
+
+        if isinstance(columns, SnowflakeQueryCompiler):
+            if not is_bool_dtype(columns.dtypes[0]):
+                columns = columns.to_pandas().iloc[:, 0].to_list()
+                enlargement_may_happen = True
+        elif not is_bool_indexer(columns):
+            enlargement_may_happen = True
+
+    if enlargement_may_happen:
+        frame_data_columns = internal_frame.data_columns_index
+        # This list contains the columns after loc set
+        union_data_columns = frame_data_columns.union(columns)
+        # Check length of the union columns when frame data columns have duplicates. Note: pandas raise ValueError:
+        # "cannot reindex on an axis with duplicate labels" when the length mismatches:
+        # E.g.,
+        #   frame_data_columns = ["A", "A", "B"], columns = ["C"]
+        #   frame_data_columns = ["A", "A", "B"], columns = ["B", "B"]
+        #   frame_data_columns = ["A", "A", "B"], columns = ["A", "A", "A"]
+        # Only when frame data has no duplicate or the length matches, pandas treats as valid cases:
+        # E.g.,
+        #   frame_data_columns = ["A", "B"], columns = ["A"]
+        #   frame_data_columns = ["A", "B"], columns = ["C"]
+        #   frame_data_columns = ["A", "A", "B"], columns = ["A", "B", "A"]
+        if frame_data_columns.has_duplicates and len(frame_data_columns) != len(
+            union_data_columns
+        ):
+            raise ValueError(CANNOT_REINDEX_ON_DUPLICATE_ERROR_MESSAGE)
+        new_column_pandas_labels = [
+            label for label in columns if label not in frame_data_columns
+        ]
+        before = frame_data_columns.value_counts()
+        after = union_data_columns.value_counts()
+        frame_data_col_labels = frame_data_columns.tolist()
+        for label in after.index:
+            if label in frame_data_columns:
+                cnt_after = after[label]
+                cnt_before = before[label]
+                assert cnt_after >= cnt_before
+                if cnt_before < cnt_after:
+                    col_pos = frame_data_col_labels.index(label)
+                    existing_column_duplication_cardinality_map[col_pos] = (
+                        cnt_after - cnt_before
+                    )
+
+    existing_column_positions = get_valid_col_positions_from_col_labels(
+        internal_frame, columns
+    )
+
+    # Generate the list of labels need to be set:
+    # When column enlargement may happen, get the list of pandas labels corresponding column key; otherwise, i.e., when
+    # it is slice or boolean indexer, use the corresponding column position to get the labels
+    column_pandas_labels = (
+        columns
+        if enlargement_may_happen
+        else [
+            internal_frame.data_column_pandas_labels[pos]
+            for pos in existing_column_positions
+        ]
+    )
+
+    return LocSetColInfo(
+        existing_column_positions,
+        existing_column_duplication_cardinality_map,
+        new_column_pandas_labels,
+        column_pandas_labels,
+    )
+
+
+def get_valid_col_positions_from_col_labels(
+    internal_frame: InternalFrame,
+    col_loc: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler",
+        Scalar,
+        tuple,
+        slice,
+        list,
+        pd.Index,
+        np.ndarray,
+    ],
+) -> list[int]:
+    """
+    The helper function to get valid column positions from labels. Out of bound labels will be ignored.
+
+    Args:
+        internal_frame: the main frame
+        col_loc: the column labels in different types
+
+    Returns:
+        Column position list
+    """
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    is_column_multiindex = internal_frame.is_multiindex(axis=1)
+    columns = internal_frame.data_columns_index
+    num_columns = len(columns)
+    all_col_pos = list(range(num_columns))
+
+    if is_column_multiindex and isinstance(col_loc, tuple) and len(col_loc) == 0:
+        # multiindex column treats empty tuple as select all columns
+        return all_col_pos
+
+    if isinstance(col_loc, slice):
+        if col_loc == slice(None):
+            return all_col_pos
+        # use native pandas' way to convert label slice indexer to position slice
+        return all_col_pos[
+            columns.slice_indexer(col_loc.start, col_loc.stop, col_loc.step)
+        ]
+
+    if isinstance(col_loc, SnowflakeQueryCompiler):
+        if is_bool_dtype(col_loc.dtypes.iloc[0]):
+            # for boolean series, we first get the index where value is True
+            # E.g., assuming col_loc = pd.Series([True, False, True], index = ["a", "b", "c"])
+            # we use filter below to get its indices with value true which are ["a", "c"]
+            col_loc = SnowflakeQueryCompiler(
+                col_loc._modin_frame.filter(
+                    col(
+                        col_loc._modin_frame.data_column_snowflake_quoted_identifiers[0]
+                    )
+                )
+            )
+            col_loc = col_loc.index
+            # get the position of the selected labels
+            return [pos for pos, label in enumerate(columns) if label in col_loc]
+        else:
+            # for other series, convert to list and process later
+            col_loc = col_loc.to_pandas().iloc[:, 0].to_list()
+    elif is_bool_indexer(col_loc):
+        # return the selected column positions. Note Snowpark pandas allows length mismatch bool indexer.
+        return [pos for pos, val in enumerate(col_loc) if val and pos < len(columns)]
+
+    if is_column_multiindex:
+        # First, convert col_loc to a list of locators
+        if is_scalar(col_loc) or isinstance(col_loc, tuple):
+            col_loc = [col_loc]
+
+        # Second, truncate oversize tuple, i.e., nlevels > columns.nlevels
+        if is_list_like(col_loc) and any([is_list_like(item) for item in col_loc]):
+            # consistent with row key handling, i.e., list of tuple keys perform exact match. Note oversize tuple will
+            # be truncated
+            col_loc = [
+                item[: columns.nlevels] if len(item) >= columns.nlevels else item
+                for item in col_loc
+            ]
+
+        # Last, convert columns locators into indexer, i.e., the position indices of selected columns. For example,
+        # assuming the index is
+        #
+        # MultiIndex([('bar', 'one'),
+        #             ('bar', 'two'),
+        #             ('baz', 'one'),
+        #             ('baz', 'two'),
+        #             ('foo', 'one'),
+        #             ('foo', 'two'),
+        #             ('quz', 'one'),
+        #             ('quz', 'two')],
+        #            names=['first', 'second'])
+        #
+        # col_loc can be a list of column locators ["bar", ("bar", "one"), (slice("bar", "foo"), "one")]. For each
+        # locator, we will generate the selected position indices and then append them together.
+        # for "bar", the position indices will be [0,1]
+        # for ("bar", "one"), the indices will be [0]
+        # for (slice("bar", "foo"), "one"), the indices for level 0 will be [0, 1, 2, 3, 4, 5] and the indices for
+        # level1 will be [0, 2, 4, 6], and then the intersected indices will be [0, 2, 4] which is the result indices
+        # The final result for the list of locators will be [0, 1, 0, 0, 2, 4]
+        indexer = []
+        for col_loc_item in col_loc:
+            # for each locator, we perform prefix matching for multiindex
+            try:
+                if is_scalar(col_loc_item):
+                    col_loc_item = (col_loc_item,)
+                item_indexer = columns.get_locs(col_loc_item)
+            except KeyError:
+                # Note Snowpark pandas will ignore out-of-bound labels
+                item_indexer = []
+            indexer.extend(item_indexer)
+    else:
+        if is_scalar(col_loc):
+            col_loc = [col_loc]
+        elif isinstance(col_loc, tuple):
+            col_loc = [col_loc] if col_loc in columns else list(col_loc)
+
+        # Note Snowpark pandas will ignore out-of-bound labels
+        # Convert col_loc to Index with object dtype since _get_indexer_strict() converts None values in lists to
+        # np.nan. This does not filter columns with label None and errors. Not using np.array(col_loc) as the key since
+        # np.array(["A", 12]) turns into array(['A', '12'].
+        col_loc = pd.Index(
+            [label for label in col_loc if label in columns], dtype=object
+        )
+
+        # `Index._get_indexer_strict` returns position index from label index
+        _, indexer = columns._get_indexer_strict(col_loc, "columns")
+    return list(indexer)
+
+
+def get_frame_by_col_label(
+    internal_frame: InternalFrame,
+    col_loc: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler",
+        Scalar,
+        tuple,
+        slice,
+        list,
+        pd.Index,
+        np.ndarray,
+    ],
+) -> InternalFrame:
+    """
+    The util method first finds all column positions based on the column labels, and then call get_frame_by_col_pos to
+    return a frame with the selected columns
+
+    Args:
+        internal_frame: main frame to select columns
+        col_loc: column label locator
+
+    Returns:
+        New frame with selected columns
+    """
+    origin_col_loc = col_loc
+
+    is_column_multiindex = internal_frame.is_multiindex(axis=1)
+
+    if is_column_multiindex and isinstance(col_loc, tuple) and len(col_loc) == 0:
+        # multiindex column treats empty tuple as select all columns
+        return internal_frame
+
+    if isinstance(col_loc, slice) and col_loc == slice(None):
+        return internal_frame
+
+    col_pos = get_valid_col_positions_from_col_labels(internal_frame, col_loc)
+    result = get_frame_by_col_pos(internal_frame, col_pos)
+
+    # TODO SNOW-962197 Support df/series.droplevel and reuse it here
+    multiindex_col_nlevels_to_drop = 0
+    if is_column_multiindex:
+        nlevels = internal_frame.num_index_levels(axis=1)
+        if is_scalar(origin_col_loc):
+            multiindex_col_nlevels_to_drop = 1
+        elif isinstance(origin_col_loc, tuple):
+            multiindex_col_nlevels_to_drop = len(origin_col_loc)
+        if nlevels == multiindex_col_nlevels_to_drop:
+            multiindex_col_nlevels_to_drop = 0
+    if multiindex_col_nlevels_to_drop:
+        # Note: only pandas labels need to be changed, no need to change snowflake_quoted_identifiers
+        new_data_column_pandas_labels = result.data_columns_index.droplevel(
+            list(range(multiindex_col_nlevels_to_drop))
+        )
+        new_data_column_pandas_index_names = result.data_column_pandas_index_names[
+            multiindex_col_nlevels_to_drop:
+        ]
+
+        result = InternalFrame.create(
+            ordered_dataframe=result.ordered_dataframe,
+            data_column_pandas_labels=new_data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=result.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=new_data_column_pandas_index_names,
+            index_column_pandas_labels=result.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=result.index_column_snowflake_quoted_identifiers,
+        )
+    return result
+
+
+def get_valid_col_pos_list_from_columns(
+    columns: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler", slice, int, AnyArrayLike
+    ],
+    num_frame_data_columns: int,
+) -> list[int]:
+    """
+    Helper to get list of column positions based on a Series containing column positions.  This handles if the
+    key Series is a bool indexer, contains floats, negative positions (within -base_frame_len <= key <= base_frame_len.)
+    Note that for columns as SnowflakeQueryCompiler this will cause a query to fetch the column positions.
+
+    Parameters
+    ----------
+    columns: 0-based column positions in one of the following forms:
+             - A SnowflakeQueryCompiler expression that produces a scalar series.
+             - A slice.
+             - An index value. Negative values are handled the same as Python arrays.
+             - A list of index values.
+             - A list of Booleans. These values are logically zipped with columns, and the columns matching up with True are selected.
+    num_frame_data_columns: Length of the base frame the positions will index into.
+
+    Return
+    ------
+    Returns list of column positions.
+    """
+    if isinstance(columns, slice):
+        pos_list = list(range(*columns.indices(num_frame_data_columns)))
+    elif isinstance(columns, int):
+        pos_list = [columns]
+    else:
+        if isinstance(columns, snowflake_query_compiler.SnowflakeQueryCompiler):
+            # Note we don't bother checking for empty to avoid a round-trip query, since we'll do a query
+            # any ways which will return no rows if it is empty.
+            pos_array = columns.to_numpy().flatten()
+        elif isinstance(columns, list) or is_list_like(columns):
+            pos_array = np.array(columns)
+        else:
+            raise ValueError(f"Unexpected argument type {columns=}")  # pragma: no cover
+
+        if is_bool_dtype(pos_array.dtype):
+            pos_list = [index for index, val in enumerate(pos_array) if val]
+
+        # convert float like keys to integers
+        elif not is_integer_dtype(pos_array.dtype):
+            assert is_float_dtype(
+                pos_array.dtype
+            ), "list-like key must be list of int or float"
+            pos_list = pos_array.astype(int)
+        else:
+            pos_list = pos_array
+
+    # Unlike native pandas, we do not raise `IndexError()` for invalid indices. Instead, we silently ignore them.
+    mapped_negative_indices = [
+        num_frame_data_columns + i if i < 0 else i for i in pos_list
+    ]
+    valid_indices = [
+        i for i in mapped_negative_indices if 0 <= i < num_frame_data_columns
+    ]
+    return valid_indices
+
+
+def get_frame_by_col_pos(
+    internal_frame: InternalFrame,
+    columns: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler",
+        slice,
+        int,
+        list,
+        AnyArrayLike,
+    ],
+) -> InternalFrame:
+    """
+    Select columns from `internal_frame` by column positions in the `columns` frame.
+    Args:
+        internal_frame: the source frame.
+        columns: 0-based column positions in one of the following forms:
+                 - A SnowflakeQueryCompiler expression that produces a scalar series.
+                 - A slice.
+                 - An index value. Negative values are handled the same as Python arrays.
+                 - A list of index values.
+                 - A list of Booleans. These values are logically zipped with columns, and the columns matching up with
+                   True are selected.
+    Returns:
+        Source frame with only the specified columns.
+    """
+    num_frame_columns = len(internal_frame.data_column_pandas_labels)
+    valid_indices = get_valid_col_pos_list_from_columns(columns, num_frame_columns)
+
+    if valid_indices == list(range(num_frame_columns)):
+        return internal_frame
+
+    frame_data_column_pandas_labels_list: list[
+        Hashable
+    ] = internal_frame.data_column_pandas_labels
+    frame_data_column_quoted_identifiers_list: list[
+        str
+    ] = internal_frame.data_column_snowflake_quoted_identifiers
+    data_column_pandas_labels: list[Hashable] = [
+        frame_data_column_pandas_labels_list[col_index] for col_index in valid_indices
+    ]
+
+    # Get the Snowpark columns that are selected, for example, if the internal frame
+    # has the following label_to_snowflake_quoted_identifier pairs for data columns:
+    #    ('A', '"A"'), ('B', '"B"'), ('C', '"C"'), ('D', '"D"')
+    # and the valid indices for selected columns are [1, 3], the selected Snowpark columns
+    # are ['"B"', '"D"'].
+    # Note that the selected columns can have duplicates, for example, [1, 1, 3]. Under
+    # this case, we will also duplicate the Snowpark columns with an alias name, and the
+    # selected Snowpark columns for [1, 1, 3] are ['"B"', col('"B"').as('"B_A03f"'), '"D"'].
+    selected_columns: list[ColumnOrName] = []
+    # the snowflake quoted identifiers for the selected Snowpark columns
+    selected_columns_quoted_identifiers: list[str] = []
+    for col_index in valid_indices:
+        snowflake_quoted_identifier = frame_data_column_quoted_identifiers_list[
+            col_index
+        ]
+        pandas_label = frame_data_column_pandas_labels_list[col_index]
+        if snowflake_quoted_identifier in selected_columns_quoted_identifiers:
+            # if the current column has already been selected, duplicate the column with
+            # an alias name
+            new_identifier = (
+                internal_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[pandas_label],
+                    excluded=selected_columns_quoted_identifiers,
+                )[0]
+            )
+            selected_columns.append(
+                col(snowflake_quoted_identifier).alias(new_identifier)
+            )
+            selected_columns_quoted_identifiers.append(new_identifier)
+        else:
+            selected_columns.append(snowflake_quoted_identifier)
+            selected_columns_quoted_identifiers.append(snowflake_quoted_identifier)
+
+    ordered_dataframe = internal_frame.ordered_dataframe.select(
+        internal_frame.index_column_snowflake_quoted_identifiers + selected_columns
+    )
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=selected_columns_quoted_identifiers,
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def get_index_frame_by_row_label_slice(
+    internal_frame: InternalFrame,
+    key: slice,
+) -> InternalFrame:
+    """
+    Returns a frame containing the selected slice key row labels as resulting data columns.
+
+    Args:
+        internal_frame: the main frame
+        key: the key containing the labels to be selected as a slice
+
+    Returns:
+        The frame with selected rows
+    """
+    row_frame = _get_frame_by_row_label_slice(internal_frame, key)
+    return row_frame.project_columns(
+        [DEFAULT_DATA_COLUMN_LABEL] * len(internal_frame.index_column_pandas_labels),
+        [col(id) for id in row_frame.index_column_snowflake_quoted_identifiers],
+    )
+
+
+def get_frame_by_row_label(
+    internal_frame: InternalFrame,
+    key: Union[InternalFrame, slice, tuple],
+) -> InternalFrame:
+    """
+    Select rows by labels in the key.
+    Args:
+        internal_frame: the main frame
+        key: the key containing the labels to be selected
+
+    Returns:
+        The frame with selected rows
+    """
+    if isinstance(key, slice):
+        return _get_frame_by_row_label_slice(internal_frame, key)
+    elif isinstance(key, tuple):
+        # The preprocessing makes sure the tuple key is only presented here when it is a multiindex lookup, i.e., the
+        # row index is multiindex and the row key is a scalar or tuple (Note scalar has been preprocessed as a single
+        # value tuple already). A prefix match will be handled here.
+        return _get_frame_by_row_multiindex_label_tuple(internal_frame, key)
+
+    assert isinstance(
+        key, InternalFrame
+    ), f"frontend should convert key to the supported types but got {type(key)}"
+
+    # check data type
+    key_datatype = key.quoted_identifier_to_snowflake_type()[
+        key.data_column_snowflake_quoted_identifiers[0]
+    ]
+
+    # boolean indexer
+    if isinstance(key_datatype, BooleanType):
+        return _get_frame_by_row_label_boolean_frame(internal_frame, key)
+
+    return _get_frame_by_row_label_non_boolean_frame(internal_frame, key)
+
+
+def _get_frame_by_row_multiindex_label_tuple(
+    internal_frame: InternalFrame,
+    key: tuple,
+) -> InternalFrame:
+    """
+    Get multiindex frame selected by the tuple label using prefix match. Note that the index levels used in this match
+    will be dropped.
+
+    Args:
+        internal_frame: the multiindex frame
+        key: tuple key
+
+    Returns:
+        Selected rows using prefix match
+    """
+    nlevels = internal_frame.num_index_levels(axis=0)
+    key_len = len(key)
+    if key_len > nlevels:
+        raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+
+    if key_len == 0:
+        # multiindex row treats empty tuple as select all rows
+        return internal_frame
+
+    # prefix match: only matching levels existing in the key
+    filtered_frame = internal_frame
+    for i in range(key_len):
+        if isinstance(key[i], slice):
+            filtered_frame = _get_frame_by_row_label_slice(
+                filtered_frame, key[i], key_level=i
+            )
+        else:
+            filter_expr = col(
+                filtered_frame.index_column_snowflake_quoted_identifiers[i]
+            ).equal_null(key[i])
+            filtered_frame = filtered_frame.filter(filter_expr)
+
+    # TODO SNOW-962197 Support df/series.droplevel and reuse it here
+    # Check if levels needs to be dropped, e.g., if a 3 level multiindex (state, county, city) is selected by a two
+    # level tuple (state, county), the result frame will drop the first two index state and county and only city left.
+    # When key_len == nlevels, pandas does not drop any columns
+    levels_to_drop = (
+        0
+        if key_len == nlevels or (key_len == 1 and isinstance(key[0], slice))
+        else key_len
+    )
+
+    new_index_column_pandas_labels = filtered_frame.index_column_pandas_labels[
+        levels_to_drop:
+    ]
+    new_index_column_snowflake_quoted_identifiers = (
+        filtered_frame.index_column_snowflake_quoted_identifiers[levels_to_drop:]
+    )
+
+    return InternalFrame.create(
+        ordered_dataframe=filtered_frame.ordered_dataframe,
+        data_column_pandas_labels=filtered_frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=filtered_frame.data_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=filtered_frame.data_column_pandas_index_names,
+        index_column_pandas_labels=new_index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=new_index_column_snowflake_quoted_identifiers,
+    )
+
+
+def _get_frame_by_row_label_slice(
+    internal_frame: InternalFrame,
+    key: slice,
+    key_level: Optional[int] = None,
+) -> InternalFrame:
+    """
+    Select rows with slice key, e.g., slice(start, stop, step).
+    - If both the start and stop labels are present in the index, then rows between the two (including them) are
+      returned.
+    - If either the start or stop is absent, but the index is sorted (either monotonic increasing or decreasing), then
+      the slicing will still work as expected, by selecting labels which rank between the two.
+    - However, if at least one of the two is absent and the index is not sorted, then rows in between will be returned
+      which is different from native pandas where an error will be raised.
+    - Also, if the index has duplicate labels and either the start or the stop label is duplicated, we will include all
+      duplicates while native pandas will raise an error.
+    - For multiindex, slice use prefix match to select rows, e.g., if the start is a tuple with 1 item and stop is a
+      tuple with 2 items, then the left bound will be the row with first level matches with the start and the right
+      bound will be the row with both first and second level match.
+      For example,
+          >>> df
+                        c1	c2
+          first	second
+          bar	one	    0	2
+                two	    1	2
+          baz	one	    2	2
+                two	    3	2
+          foo	one	    4	2
+                two	    5	2
+          qux	one	    6	2
+                two	    7	2
+
+          >>> df_mi[slice(('foo',), ('qux', 'one'))]
+                        c1	c2
+          first	second
+          foo	one	    4	2
+                two	    5	2
+          qux	one	    6	2
+
+    Args:
+        internal_frame: the main frame
+        key: the slice key
+        key_level: the level of the key if specified
+
+    Returns:
+        Frame with selected rows
+    """
+    if key == slice(None):
+        return internal_frame
+    if key.step is not None:
+        if not is_integer(key.step):
+            raise TypeError("slice step must be integer.")
+        if key.step == 0:
+            raise ValueError("slice step cannot be zero.")
+
+    # Use this helper method to convert all slice start and stop to tuple, so we have single code path for both single
+    # index and multiindex
+    def to_tuple(slice_val: Any) -> Optional[tuple]:
+        if slice_val is None:
+            return slice_val
+        if not isinstance(slice_val, tuple):
+            slice_val = (slice_val,)
+        slice_val_len = len(slice_val)
+        if slice_val_len > internal_frame.num_index_levels(axis=0):
+            raise IndexingError(TOO_MANY_INDEXERS_INDEXING_ERROR_MESSAGE)
+        return slice_val
+
+    start, stop, step = to_tuple(key.start), to_tuple(key.stop), key.step
+    frame = internal_frame.ensure_row_position_column()
+    row_pos_col = col(frame.row_position_snowflake_quoted_identifier)
+
+    if key_level is not None:
+        assert (
+            (not isinstance(start, tuple) or len(start) == 1)
+            and (not isinstance(stop, tuple) or len(stop) == 1)
+            and internal_frame.num_index_levels(axis=0) > 1
+        ), "key_level is only used where key does not contain multiple levels and internal_frame has multiindex"
+        index_cols = [
+            col(id) for id in frame.get_snowflake_identifiers_for_levels([key_level])
+        ]
+        nlevels = 1
+    else:
+        # the number of levels matters in both start and stop of the key , e.g., if start has length 1 and stop has
+        # length 2, the nlevels will be 2 which means it will include the first two level index columns to generate the
+        # rest helper columns which can be shared to build the left and right bound. In this case, the left bound only
+        # need to check the first level but the right bound need to check the first two levels
+        nlevels = max(
+            1 if start is None else len(start), 1 if stop is None else len(stop)
+        )
+
+        index_cols = [
+            col(id)
+            for id in frame.get_snowflake_identifiers_for_levels(list(range(nlevels)))
+        ]
+
+    # create a lag_index column, so we can compare current index vs. index in previous row, e.g.,
+    # +-------+-----------+
+    # | index | lag_index |
+    # +-------+-----------+
+    # | "a"   | null      |
+    # +-------+-----------+
+    # | "b"   | "a"       |
+    # +-------+-----------+
+    # | "c"   | "b"       |
+    # +-------+-----------+
+    for i, index_col in enumerate(index_cols):
+        frame = frame.append_column(
+            f"lag_index_{i}", lag(index_col).over(Window.order_by(row_pos_col))
+        )
+    lag_index_cols = [
+        col(id) for id in frame.data_column_snowflake_quoted_identifiers[-nlevels:]
+    ]
+    # create a last row position column which will be used to check step
+    frame = frame.append_column("last_row_pos", max_(row_pos_col).over())
+
+    # The helper method to generate prefix match expression
+    def prefix_match_compare_ops(
+        lhs: list[Column], rhs: Union[tuple, list[Column]], ops: Literal[">", "<", "=="]
+    ) -> Column:
+        if isinstance(rhs, tuple):
+            rhs = [pandas_lit(item) for item in rhs]
+        expr = None
+        if ops == "==":
+            # the condition to find the label is present, so all levels need to equal
+            for i, val in enumerate(rhs):
+                eq = lhs[i].equal_null(val)
+                expr = eq if expr is None else expr & eq
+        else:
+            # the condition to find the index are larger or less than the bound. Taking a two level prefix as an
+            # example, either the first level index needs to larger or less than the first level of the bound, or the
+            # first index equals and the second level index needs to larger or less than the second level of the bound
+            for i, val in enumerate(rhs):
+                level_match = lhs[i] > val if ops == ">" else lhs[i] < val
+                if i == 0:
+                    expr = level_match
+                else:
+                    temp = None
+                    for k in range(i):
+                        eq = lhs[k].equal_null(rhs[k])
+                        temp = eq if temp is None else temp & eq
+                    expr = expr | (temp & level_match)
+        return expr
+
+    # create is_monotonic_decreasing column to check whether it is in descending order
+    is_monotonic_decreasing = prefix_match_compare_ops(index_cols, lag_index_cols, "<")
+    frame = frame.append_column(
+        "is_monotonic_decreasing",
+        coalesce(min_(is_monotonic_decreasing).over(), pandas_lit(False)),
+    )
+    last_row_pos_col = col(frame.data_column_snowflake_quoted_identifiers[-2])
+    is_monotonic_decreasing_col = col(
+        frame.data_column_snowflake_quoted_identifiers[-1]
+    )
+
+    reverse_order = key.step is not None and key.step < 0
+    if reverse_order:
+        # also switch start and stop
+        start, stop = stop, start
+
+    # the helper function to generate left or right bound column
+    def generate_bound_column(
+        _frame: InternalFrame, bound_type: Literal["left", "right"], bound: tuple
+    ) -> tuple[InternalFrame, Column]:
+        """
+        Taking left bound L as an example, the new left bound column will be:
+            coalesce(
+                min(iff(col("index") == L, col(row_pos), None)).over(), -- case 1
+                min(iff(col("is_monotonic_decreasing") and col("index") < L, col(row_pos), None)).over(), -- case 2
+                min(iff(not col("is_monotonic_decreasing") and col("index") > L, col(row_pos), None)).over(), -- case 3
+            )
+        For example:
+        - if the left bound 1 is present in the index [0,1,2,3], then the new left bound column will match case 1 and
+          will contain all values as row_pos = 1
+        - if the left bound 5 is absent and the index is monotonic decreasing [3,2,1,0], then the new left bound will
+          match the case 2 and the values will be row_pos = 0
+        - otherwise, e.g., left bound -1 and the index is [0,1,2,3] or [0,2,3,1], the new left bound will be row_pos = 0
+        """
+        if bound_type == "left":
+            agg_method = min_
+            monotonic_decreasing_bound = is_monotonic_decreasing_col & (
+                prefix_match_compare_ops(index_cols, bound, "<")
+            )
+            non_monotonic_decreasing_bound = ~is_monotonic_decreasing_col & (
+                prefix_match_compare_ops(index_cols, bound, ">")
+            )
+        else:
+            agg_method = max_
+            monotonic_decreasing_bound = is_monotonic_decreasing_col & (
+                prefix_match_compare_ops(index_cols, bound, ">")
+            )
+            non_monotonic_decreasing_bound = ~is_monotonic_decreasing_col & (
+                prefix_match_compare_ops(index_cols, bound, "<")
+            )
+
+        bound_present = prefix_match_compare_ops(index_cols, bound, "==")
+        bound_col = coalesce(
+            # if the bound presents in the index, then include all between the bounds
+            agg_method(iff(bound_present, row_pos_col, pandas_lit(None))).over(),
+            # else if the index is monotonic decreasing, then include all smaller than the left and larger than the
+            # right
+            agg_method(
+                iff(monotonic_decreasing_bound, row_pos_col, pandas_lit(None))
+            ).over(),
+            # otherwise, include all larger than the left and smaller than the right
+            agg_method(
+                iff(non_monotonic_decreasing_bound, row_pos_col, pandas_lit(None))
+            ).over(),
+        )
+        _frame = _frame.append_column(f"{bound_type}_bound", bound_col)
+        return _frame, col(_frame.data_column_snowflake_quoted_identifiers[-1])
+
+    if start is not None:
+        frame, left_bound_col = generate_bound_column(frame, "left", start)
+        left_bound_filter = row_pos_col >= left_bound_col
+    else:
+        # when start is None, no left bound exists
+        # so the filter is true and the column is always the first row 0
+        left_bound_filter = pandas_lit(True)
+        left_bound_col = pandas_lit(0)
+
+    if stop is not None:
+        frame, right_bound_col = generate_bound_column(frame, "right", stop)
+        right_bound_filter = row_pos_col <= right_bound_col
+    else:
+        # when stop is None, no right bound exists
+        # so the filter is true and the column is the last row position
+        right_bound_filter = pandas_lit(True)
+        right_bound_col = last_row_pos_col
+
+    step_bound_filter = pandas_lit(True)
+    if step is not None and abs(step) != 1:
+        if step > 0:
+            step_bound_filter = (
+                (row_pos_col - left_bound_col) % pandas_lit(abs(step))
+            ) == 0
+        else:
+            step_bound_filter = (
+                (right_bound_col - row_pos_col) % pandas_lit(abs(step))
+            ) == 0
+    ordered_dataframe = frame.ordered_dataframe.filter(
+        left_bound_filter & right_bound_filter & step_bound_filter
+    )
+
+    ordering_columns = (
+        [column.reverse() for column in ordered_dataframe.ordering_columns]
+        if reverse_order
+        else ordered_dataframe.ordering_columns
+    )
+    ordered_dataframe = ordered_dataframe.sort(ordering_columns)
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def _get_frame_by_row_label_boolean_frame(
+    internal_frame: InternalFrame,
+    key: InternalFrame,
+) -> InternalFrame:
+    """
+    Select rows with boolean frame key. Here, if the frame and key's index are aligned, then the join is on their row
+    position; otherwise, join on their indices.
+
+    Args:
+        internal_frame: the main frame
+        key: the key holding the boolean labels
+
+    Returns:
+        Frame with selected rows
+    """
+    # joined_frame should keep the original index, so we do not coalesce on join keys and do not inherit join index
+    # to join_on_row_position_if_matching_index_otherwise_join_on_index with how argument
+    joined_frame, result_column_mapper = align_on_index(
+        internal_frame,
+        key,
+        "coalesce",
+    )
+
+    key_bool_val_col = col(
+        result_column_mapper.map_right_quoted_identifiers(
+            key.data_column_snowflake_quoted_identifiers
+        )[0]
+    )
+    # only select rows where key's boolean value is True
+    # pd.Series([1,2,3], index=[1,2,3]).loc[pd.Series([True, True, True, True], index = [0,1,2,3])]
+    # The result does not include index = 0
+    filtered_frame = joined_frame.filter(key_bool_val_col)
+    # Note:
+    # 1. only use left's data columns
+    # 2. keep using left row position column as the ordering column
+    return InternalFrame.create(
+        ordered_dataframe=filtered_frame.ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=result_column_mapper.map_left_quoted_identifiers(
+            internal_frame.data_column_snowflake_quoted_identifiers
+        ),
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=result_column_mapper.map_left_quoted_identifiers(
+            internal_frame.index_column_snowflake_quoted_identifiers
+        ),
+    )
+
+
+def _get_frame_by_row_label_non_boolean_frame(
+    internal_frame: InternalFrame,
+    key: InternalFrame,
+) -> InternalFrame:
+    """
+    Select rows where its index is equal to the index in the key value.
+
+    Args:
+        internal_frame: the main frame
+        key: the frame/series holding the labels
+
+    Returns:
+        Frame with selected rows
+    """
+    # This method is going to join internal_frame with key to select the index in the key values
+    # The default left_on is the internal_frame's index columns and the default right_on is the key's data columns
+    left_on = key.data_column_snowflake_quoted_identifiers
+    right_on = internal_frame.index_column_snowflake_quoted_identifiers
+    if internal_frame.is_multiindex(axis=0):
+        # When row index is multiindex, if key value is array type (list like), then loc does exact match
+        # Otherwise, when key value is not array type, loc does prefix match, i.e., match the top level only
+        # e.g., if the internal frame has multiindex ["foo", "bar"]
+        if isinstance(
+            key.quoted_identifier_to_snowflake_type()[
+                key.data_column_snowflake_quoted_identifiers[0]
+            ],
+            ArrayType,
+        ):
+            # if the key is array type, pandas performs exact match, so the value in the array needs to be exact
+            # ["foo", "bar"]
+            num_levels = internal_frame.num_index_levels(axis=0)
+            for level in range(num_levels):
+                # get return NULL if the level does not exist, and then the later join won't match too
+                mi_level_key = get(
+                    col(key.data_column_snowflake_quoted_identifiers[0]), level
+                )
+                key = key.append_column(
+                    internal_frame.index_column_pandas_labels[level], mi_level_key
+                )
+            left_on = (
+                key.ordered_dataframe.projected_column_snowflake_quoted_identifiers[
+                    -num_levels:
+                ]
+            )
+        else:
+            # if the key is not array, pandas performs prefix match, e.g., the value needs to be "foo" to match with
+            # multiindex ["foo", "bar"]
+            right_on = internal_frame.index_column_snowflake_quoted_identifiers[:1]
+
+    # Note: join internal_frame's index column with key's data column
+    joined_frame, result_column_mapper = join(
+        key,
+        internal_frame,
+        how="inner",
+        left_on=left_on,
+        right_on=right_on,
+        inherit_join_index=InheritJoinIndex.FROM_RIGHT,
+    )
+
+    # Note: reuse pandas labels from internal frame
+    return InternalFrame.create(
+        ordered_dataframe=joined_frame.ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.data_column_snowflake_quoted_identifiers
+        ),
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        index_column_pandas_labels=joined_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.index_column_snowflake_quoted_identifiers
+        ),
+    )
+
+
+def _get_frame_by_row_series_bool(
+    internal_frame: InternalFrame,
+    key: InternalFrame,
+) -> InternalFrame:
+    """
+    Helper function for `get_frame_2d_by_label_and_positional` by row with Series[bool] input.
+
+    key will be reindexed to match DataFrame index.
+    Return an InternalFrame with rows from `internal_frame` whose index are indexes in the boolean mask with True value.
+
+    Parameters
+    ----------
+    internal_frame: the InternalFrame of the series calling loc
+    key: InternalFrame of Series[bool] input.
+
+    Returns
+    -------
+    Result InternalFrame from loc by row with Series boolean mask.
+
+    """
+    # TODO: SNOW-884220 support Series[bool] with multiindex
+    # we only support single index for now.
+    key_index_identifier = key.index_column_snowflake_quoted_identifiers[0]
+
+    # validate no duplicate index in key
+    if not key.has_unique_index(axis=0):
+        raise UNALIGNABLE_INDEXING_ERROR
+    # raise error unless both are numeric or have the same type, otherwise isin will do Implicit Casting (“Coercion”)
+    # e.g. if key's index is Index([1, 2, 3]) and df's index is Index(["1", "2", "3"]) they do not match in pandas
+    # TODO SNOW-878592: if key has DateTimeIndex, and df has str index, it should be valid and supported.
+    #  No need to support the other way around.
+    # e.g. `df = pd.DataFrame({'val': range(3)}, index=['2023-01-01', '2023-01-02', '2023-01-03', ])
+    # bool_series = pd.Series([False, False, True, ],index=pd.date_range('2023-01-01', periods=3, freq='D'))
+    # `df.loc[bool_series]` should return a dataframe with the third row from df
+    key_index_type = key.quoted_identifier_to_snowflake_type()[key_index_identifier]
+    df_index_type = internal_frame.quoted_identifier_to_snowflake_type()[
+        internal_frame.index_column_snowflake_quoted_identifiers[0]
+    ]
+    if (
+        not (
+            is_numeric_snowpark_type(key_index_type)
+            and is_numeric_snowpark_type(df_index_type)
+        )
+        and df_index_type != key_index_type
+    ):
+        raise UNALIGNABLE_INDEXING_ERROR
+
+    new_key = InternalFrame.create(
+        # filter key based on boolean mask
+        ordered_dataframe=key.ordered_dataframe.filter(
+            key.data_column_snowflake_quoted_identifiers[0]
+        ),
+        data_column_pandas_labels=key.data_column_pandas_labels,
+        data_column_pandas_index_names=key.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=key.data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=key.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=key.index_column_snowflake_quoted_identifiers,
+    )
+
+    joined_frame, result_column_mapper = join(
+        left=new_key,
+        right=internal_frame,
+        left_on=key.index_column_snowflake_quoted_identifiers,
+        right_on=internal_frame.index_column_snowflake_quoted_identifiers,
+        how="inner",
+        inherit_join_index=InheritJoinIndex.FROM_RIGHT,
+    )
+    return InternalFrame.create(
+        ordered_dataframe=joined_frame.ordered_dataframe,
+        data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.data_column_snowflake_quoted_identifiers
+        ),
+        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=result_column_mapper.map_right_quoted_identifiers(
+            internal_frame.index_column_snowflake_quoted_identifiers
+        ),
+    )
+
+
+def _propagate_last_row_if_columns_are_short(
+    frame: InternalFrame,
+    columns_to_ffill: list[str],
+    window_row_position_snowflake_identifier: str,
+    frame_row_position_snowflake_identifier: str,
+) -> InternalFrame:
+    """
+    This helper method handles propagating the last row of a frame when it is too short, e.g.
+    if we were to do df[:] = [[1, 2, 3]], where df has 3 rows, we'd need to propagate the last row
+    of item so that the item frame has 3 rows. Performs a forward-fill for the specified columns.
+
+    Args:
+        frame: the internal frame whose rows to propagate.
+        columns: the subset of columns to forward fill.
+        window_row_position_snowflake_identifier: the order column to use for the Window.
+        frame_row_position_snowflake_identifier: the row position column to use to determine the last value.
+
+    Returns:
+        the frame with the specified columns forward filled.
+    """
+    return (
+        frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                c: iff(
+                    is_null(window_row_position_snowflake_identifier),
+                    last_value(c, ignore_nulls=True).over(
+                        Window.order_by(
+                            frame_row_position_snowflake_identifier
+                        ).rows_between(Window.UNBOUNDED_PRECEDING, Window.CURRENT_ROW)
+                    ),
+                    col(c),
+                )
+                for c in columns_to_ffill
+            }
+        )
+    ).frame
+
+
+def _set_2d_labels_helper_for_frame_item(
+    internal_frame: InternalFrame,
+    index: Union[slice, InternalFrame],
+    item: InternalFrame,
+    matching_item_columns_by_label: bool,
+    matching_item_rows_by_label: bool,
+    col_info: LocSetColInfo,
+    index_is_bool_indexer: bool,
+) -> InternalFrame:
+    """
+    This set 2d label helper method handles df[index, columns] = item where index is a non-boolean indexer and item is a
+    dataframe. It uses a general 2-join approach:
+    First, join index and item
+    Second, let the internal_frame to join the result from the first step
+
+    Args:
+        internal_frame: the internal frame for the main dataframe/series
+        index: the internal frame for the index. Note that index can be None and we can save one join for this case.
+        item: the internal frame for the item
+        matching_item_columns_by_label: whether matching item columns by labels or positions
+        matching_item_rows_by_label: whether matching item rows by labels or positions
+        col_info: the column information extracted from columns input
+        index_is_bool_indexer: if True, the index is a boolean indexer
+
+    Returns:
+        the frame joined with internal frame, index, and, item
+    """
+    if not matching_item_columns_by_label:
+        expected_num_cols_item = len(col_info.column_pandas_labels)
+        actual_num_cols_item = len(item.data_column_pandas_labels)
+        if expected_num_cols_item != actual_num_cols_item:
+            # when matching item by position, pandas requires the number of columns match between df and item
+            raise ValueError(
+                LOC_SET_ITEM_SHAPE_MISMATCH_ERROR_MESSAGE.format(
+                    actual_num_cols_item, expected_num_cols_item
+                )
+            )
+
+    if index_is_bool_indexer:
+        result_frame = align_on_index(
+            internal_frame,
+            index,
+            "coalesce",
+        ).result_frame
+
+        return align_on_index(
+            result_frame,
+            item,
+            "coalesce",
+        ).result_frame
+
+    if index == slice(None):
+        # No need to join index, only need one join between the internal_frame and item
+        if matching_item_rows_by_label:
+            return align_on_index(
+                internal_frame,
+                item,
+                "coalesce",
+            ).result_frame
+        else:
+            # We are in this case if RHS was originally an array object (so not a Snowpark pandas
+            # object). In this case, if item is too short, we want to propagate the last non-null
+            # values of each column, to match pandas broadcasting behavior in the case that we are
+            # doing a loc set of multiple rows, but only provide 1 row (in which case pandas would
+            # just set all of the rows to the provided rows).
+            internal_frame = internal_frame.ensure_row_position_column()
+            item = item.ensure_row_position_column()
+            frame, mapping = join(
+                internal_frame,
+                item,
+                how="left",
+                left_on=[internal_frame.row_position_snowflake_quoted_identifier],
+                right_on=[item.row_position_snowflake_quoted_identifier],
+            )
+            return _propagate_last_row_if_columns_are_short(
+                frame,
+                columns_to_ffill=mapping.map_right_quoted_identifiers(
+                    item.data_column_snowflake_quoted_identifiers
+                ),
+                window_row_position_snowflake_identifier=mapping.map_right_quoted_identifiers(
+                    [item.row_position_snowflake_quoted_identifier]
+                )[
+                    0
+                ],
+                frame_row_position_snowflake_identifier=mapping.map_left_quoted_identifiers(
+                    [internal_frame.row_position_snowflake_quoted_identifier]
+                )[
+                    0
+                ],
+            )
+
+    assert isinstance(index, InternalFrame)
+    if not item.has_unique_index(axis=1):
+        raise ValueError(SETTING_NON_UNIQUE_COLUMNS_IS_NOT_ALLOWED_ERROR_MESSAGE)
+
+    # Let's use an example to walk through the code below. Assuming we will perform the following operations:
+    #
+    # >>> df = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=["A", "B", "C", "D"])
+    #
+    # >>> df
+    # 	A	B	C	D
+    # 0	1	2	3	4
+    # 1	4	5	6	7
+    # 2	7	8	9	10
+    #
+    # >>> column_key = ["B", "E", 1, "B", "C", "X", "C", 2, "C", "D"]
+    #
+    # >>> item = pd.DataFrame(
+    #     [[91, 92, 93, 94], [94, 95, 96, 97], [97, 98, 99, 100]],
+    #     columns=["A", "B", "C", "X"],
+    #     index=[
+    #         3,  # 3 does not exist in the row key, so it will be skipped
+    #         2,
+    #         1,
+    #     ],
+    # )
+    # >>> row_key = pd.Series(
+    #     [
+    #         0,  # 0 does not exist in item, so the row values will be set to NULL
+    #         1,
+    #     ]
+    # )
+    #
+    # >>> df.loc[row_key, col_key] = item
+    # >>> df
+    # 	A B    B    C    C    C    D    E   1   X     2
+    # 0	1 NaN  NaN  NaN  NaN  NaN  NaN  NaN NaN NaN   NaN
+    # 1	4 98.0 98.0 99.0 99.0 99.0 NaN  NaN NaN 100.0 NaN
+    # 2	7 8.0  8.0  9.0	 9.0  9.0  10.0 NaN NaN NaN   NaN
+    #
+    # We can summarize the pandas behavior for loc set as follows:
+    # 1. The result keeps the original row and column order, e.g., row is still from 0 to 2 and columns is from A to D.
+    # 2. The result can append duplicate and new columns (i.e., Column enlargement) which is defined in ``column_key``
+    #    in the above example (i.e., ``duplicate_data_column_pos_to_count_map`` and
+    #    ``new_data_column_pandas_labels_to_append`` from the arguments). The duplicated columns will be appended after
+    #    the original ones. And new columns will be appended after original columns, e.g., "E", 1, "X", 2. Note row
+    #    enlargement is not allowed in this case. It only happens when index is a scalar or tuple for multiindex.
+    # 3. Any cells which matches with the ``row_key`` and ``column_key`` will be updated. If the same row and column
+    #    index exist in ``item``, it will be updated using the item value (e.g., row 1, col "C"); otherwise, it will be
+    #    NaN (e.g., row 0, col "B").
+    # In this case, ``set_frame_2d_labels``'s arguments will be
+    #   columns = [1, 2, 3]
+    #   duplicate_data_column_pos_to_count_map = {1: 2, 2: 3}
+    #   new_data_column_pandas_labels_to_append = ["E", 1, "X", 2]
+
+    # first, left join index and item on columns representing the row index. So the row indices that do not exist in
+    # ``index`` will be skipped. Using the above example, the ``index_with_item`` will contain row index [0, 1] and
+    # the ``index`` and ``item``'s data columns: [key."__reduced__", "A", "B", "C", "X"]
+    assert len(index.data_column_snowflake_quoted_identifiers) == len(
+        item.index_column_snowflake_quoted_identifiers
+    ), "TODO: SNOW-966427 handle it well in multiindex case"
+    if not matching_item_rows_by_label:
+        index = index.ensure_row_position_column()
+        left_on = [index.row_position_snowflake_quoted_identifier]
+        item = item.ensure_row_position_column()
+        right_on = [item.row_position_snowflake_quoted_identifier]
+    else:
+        left_on = index.data_column_snowflake_quoted_identifiers
+        right_on = item.index_column_snowflake_quoted_identifiers
+
+    index_with_item, mapping = join(
+        left=index,
+        right=item,
+        left_on=left_on,
+        right_on=right_on,
+        how="left",
+    )
+
+    index_with_item = index_with_item.strip_duplicates(
+        quoted_identifiers=[index.data_column_snowflake_quoted_identifiers[0]]
+    )
+
+    if not matching_item_rows_by_label:
+        # We are in this case if RHS was originally an array object (so not a Snowpark pandas
+        # object). In this case, if item is too short, we want to propagate the last non-null
+        # values of each column, to match pandas broadcasting behavior in the case that we are
+        # doing a loc set of multiple rows, but only provide 1 row (in which case pandas would
+        # just set all of the rows to the provided rows).
+        index_with_item = _propagate_last_row_if_columns_are_short(
+            index_with_item,
+            columns_to_ffill=mapping.map_right_quoted_identifiers(
+                item.data_column_snowflake_quoted_identifiers
+            ),
+            window_row_position_snowflake_identifier=mapping.map_right_quoted_identifiers(
+                [item.row_position_snowflake_quoted_identifier]
+            )[
+                0
+            ],
+            frame_row_position_snowflake_identifier=mapping.map_left_quoted_identifiers(
+                [index.row_position_snowflake_quoted_identifier]
+            )[0],
+        )
+    # second, left join with main frame on columns representing the row index. Similarly, row index does not exist
+    # in ``internal_frame`` will be skipped. So ``result_frame`` contains original row index [0,1,2] and 8 data
+    # columns: ["A", "B", "C", "D", key."__reduced__", "A_alias", "B_alias", "C_alias", "X"]
+    return join(
+        left=internal_frame,
+        right=index_with_item,
+        left_on=internal_frame.index_column_snowflake_quoted_identifiers,
+        right_on=index_with_item.data_column_snowflake_quoted_identifiers[:1],
+        how="left",
+    ).result_frame
+
+
+def _set_2d_labels_helper_for_non_frame_item(
+    internal_frame: InternalFrame,
+    index: Union[slice, Scalar, InternalFrame],
+    index_is_bool_indexer: bool,
+) -> InternalFrame:
+    """
+    The helper method for the case where item is not an internal frame
+    Args:
+        internal_frame: the internal frame.
+        index: the index frame or scalar or slice(None).
+        index_is_bool_indexer: if True, the index is a boolean indexer.
+    Returns:
+        result frame
+    """
+    if is_scalar(index):
+        # we use outer join to handle scalar index which may enlarge/append the new row to the frame and use
+        # JoinKeyCoalesceConfig.LEFT to set the result index = coalesce(left, right)
+        # For example, if index is "a", if "a" exists in the internal frame, the outer join will be
+        #
+        #  __index__ | ... | __reduced__ |
+        #     "a"    | ... |      NULL   |
+        #     "b"    | ... |      NULL   |
+        #     "c"    | ... |      NULL   |
+        #
+        # If index is "x" and does not exist in the internal frame, the outer join will be
+        #  __index__ | ... | __reduced__ |
+        #     "a"    | ... |      NULL   |
+        #     "b"    | ... |      NULL   |
+        #     "c"    | ... |      NULL   |
+        #     "x"    | ... |      NULL   |
+        #
+        index_frame = pd.Series([None], index=[index])._query_compiler._modin_frame
+        return join(
+            left=internal_frame,
+            right=index_frame,
+            left_on=internal_frame.index_column_snowflake_quoted_identifiers,
+            right_on=index_frame.index_column_snowflake_quoted_identifiers,
+            how="outer",
+            join_key_coalesce_config=[JoinKeyCoalesceConfig.LEFT],
+        ).result_frame
+    elif index_is_bool_indexer:
+        return align_on_index(
+            internal_frame,
+            index,
+            "coalesce",
+        ).result_frame
+    elif isinstance(index, InternalFrame):
+        index = index.strip_duplicates(
+            quoted_identifiers=[index.data_column_snowflake_quoted_identifiers[0]]
+        )
+
+        return join(
+            left=internal_frame,
+            right=index,
+            left_on=internal_frame.index_column_snowflake_quoted_identifiers,
+            right_on=index.data_column_snowflake_quoted_identifiers,
+            how="left",
+        ).result_frame
+
+    # No need to join index, only need one join between the internal_frame and item
+    return internal_frame
+
+
+# TODO: SNOW-985231 check how duplication works for when index and item have duplication, and also see if we can unify
+#  code path for single column item and regular dataframe item
+def _set_2d_labels_helper_for_single_column_wise_item(
+    internal_frame: InternalFrame,
+    index: InternalFrame,
+    item_values: list,
+    item_data_column_pandas_labels: list[Hashable],
+    index_is_bool_indexer: bool,
+    enforce_match_item_by_row_labels: bool,
+) -> InternalFrame:
+    """
+    # If it's a single column with an item list, then we set the item values column-wise, for example,
+    # df.loc[['x','y'],'B'] = [97,96] would result in:
+    #       ... | B  | ...
+    #    -------+----+-----
+    #       'x' | 97 | ...
+    #       'y' | 96 | ...
+    #       ... | .. | ...
+
+    Args:
+        internal_frame: the internal frame
+        index: the index frame
+        item_values: the item values in a list
+        index_is_bool_indexer: if True, the index is a boolean indexer.
+        enforce_match_item_by_row_labels: if True, label matching must be used.
+
+    Returns:
+        The result frame contains original frame, index, and, item values.
+    """
+    assert (
+        len(item_data_column_pandas_labels) == 1
+    ), "Please ensure it is a single column."
+
+    item = pd.DataFrame(
+        item_values,
+        columns=item_data_column_pandas_labels,
+    )._query_compiler._modin_frame
+
+    item = item.ensure_row_position_column()
+    index = index.ensure_row_position_column()
+
+    if index_is_bool_indexer:
+        index = _get_frame_by_row_pos_boolean_frame(internal_frame, key=index)
+        index = index.project_columns(
+            [DEFAULT_DATA_COLUMN_LABEL],
+            [col(id) for id in index.index_column_snowflake_quoted_identifiers],
+        ).ensure_row_position_column()
+
+    # First, we join the index and item based on the relative row positions.
+    index_with_item_res = align(
+        left=index,
+        right=item,
+        left_on=[index.row_position_snowflake_quoted_identifier],
+        right_on=[item.row_position_snowflake_quoted_identifier],
+        how="coalesce",
+    )
+    index_with_item = index_with_item_res.result_frame
+    index_with_item_mapper = index_with_item_res.result_column_mapper
+    item_row_position_column = index_with_item_mapper.map_right_quoted_identifiers(
+        [item.row_position_snowflake_quoted_identifier]
+    )[0]
+
+    # If the item values is shorter than the index, we will fill in with the last item value.
+    index_with_item = index_with_item.project_columns(
+        index_with_item.data_column_pandas_labels,
+        [
+            col(col_id)
+            for col_id in index_with_item_mapper.map_left_quoted_identifiers(
+                index.data_column_snowflake_quoted_identifiers
+            )
+        ]
+        + [
+            iff(
+                col(item_row_position_column).is_null(),
+                pandas_lit(item_values[-1]),
+                col(index_with_item.data_column_snowflake_quoted_identifiers[-1]),
+            )
+        ],
+    )
+
+    if index_is_bool_indexer or enforce_match_item_by_row_labels:
+        # Here, we are using label matching, so we want to match the labels of internal_frame (the index column)
+        # with the labels of index, which become the first data column of index_with_item after the join.
+        # deduplicate row index in index_with_item and use the last_value for the duplicate index
+        index_with_item = index_with_item.strip_duplicates(
+            quoted_identifiers=[index.data_column_snowflake_quoted_identifiers[0]]
+        )
+        left_on = internal_frame.index_column_snowflake_quoted_identifiers
+        right_on = index_with_item.data_column_snowflake_quoted_identifiers[:1]
+    else:
+        internal_frame = internal_frame.ensure_row_position_column()
+        left_on = [internal_frame.row_position_snowflake_quoted_identifier]
+        index_with_item = index_with_item.ensure_row_position_column()
+        right_on = [index_with_item.row_position_snowflake_quoted_identifier]
+
+    return align(
+        left=internal_frame,
+        right=index_with_item,
+        left_on=left_on,
+        right_on=right_on,
+        how="coalesce",
+    ).result_frame
+
+
+def set_frame_2d_labels(
+    internal_frame: InternalFrame,
+    index: Union[Scalar, slice, InternalFrame],
+    columns: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler",
+        tuple,
+        slice,
+        list,
+        pd.Index,
+        np.ndarray,
+    ],
+    item: Union[Scalar, AnyArrayLike, InternalFrame],
+    matching_item_columns_by_label: bool,
+    matching_item_rows_by_label: bool,
+    index_is_bool_indexer: bool,
+    deduplicate_columns: bool,
+) -> InternalFrame:
+    """
+    Helper function to handle the general loc set functionality. The general idea here is to join the key from ``index``
+    and ``columns`` with the ``item`` first and then let the ``internal_frame`` performs a left join on the result from
+    the first step. Then we do postprocessing on the joined frame to update the rows and columns.
+
+    Args:
+        internal_frame: the main frame
+        index: the row labels to set. None means all rows are included.
+        columns: the column labels to set
+        item: the new values to set
+        matching_item_columns_by_label: if True (e.g., df.loc[row_key, col_key] = item), only ``item``'s column labels
+            match with col_key are used to set df values; otherwise, (e.g., df.loc[row_key_only] = item), use item's
+            column position to match with the main frame. E.g., df has columns ["A", "B", "C"] and item has columns
+            ["C", "B", "A"], df.loc[:] = item will update df's columns "A", "B", "C" using item column "C", "B", "A"
+            respectively.
+        matching_item_rows_by_label: if True (e.g., df.loc[row_key, col_key] = item), only ``item``'s row labels match
+            with row_key are used to set df values; otherwise, (e.g., df.loc[col_key_only] = item), use item's
+            row position to match with the main frame. E.g., df has rows ["A", "B", "C"] and item is a 2D NumPy Array
+            df.loc[:] = item will update df's rows "A", "B", "C" using item's rows 0, 1, 2 respectively.
+            `matching_item_rows_by_label` diverges from pandas behavior due to the lazy nature of snowpandas. In native
+            pandas, if the length of the objects that we are joining is not equivalent, then pandas would error out
+            because the shape is not broadcastable; while here, we use standard left join behavior.
+        index_is_bool_indexer: if True, the index is a boolean indexer. Note we only handle boolean indexer with
+                item is a SnowflakeQueryCompiler here.
+        deduplicate_columns: if True, deduplicate columns from ``columns``.
+    Returns:
+        New frame where values have been set
+    """
+    # TODO SNOW-962260 support multiindex
+    # TODO SNOW-966481 support series
+
+    # Let's use an example to walk through the code below. Assuming we will perform the following operations:
+    #
+    # >>> df = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=["A", "B", "C", "D"])
+    #
+    # >>> df
+    # 	A	B	C	D
+    # 0	1	2	3	4
+    # 1	4	5	6	7
+    # 2	7	8	9	10
+    #
+    # >>> column_key = ["B", "E", 1, "B", "C", "X", "C", 2, "C", "D"]
+    #
+    # >>> item = pd.DataFrame(
+    #     [[91, 92, 93, 94], [94, 95, 96, 97], [97, 98, 99, 100]],
+    #     columns=["A", "B", "C", "X"],
+    #     index=[
+    #         3,  # 3 does not exist in the row key, so it will be skipped
+    #         2,
+    #         1,
+    #     ],
+    # )
+    # >>> row_key = pd.Series(
+    #     [
+    #         0,  # 0 does not exist in item, so the row values will be set to NULL
+    #         1,
+    #     ]
+    # )
+    #
+    # >>> df.loc[row_key, col_key] = item
+    # >>> df
+    # 	A B    B    C    C    C    D    E   1   X     2
+    # 0	1 NaN  NaN  NaN  NaN  NaN  NaN  NaN NaN NaN   NaN
+    # 1	4 98.0 98.0 99.0 99.0 99.0 NaN  NaN NaN 100.0 NaN
+    # 2	7 8.0  8.0  9.0	 9.0  9.0  10.0 NaN NaN NaN   NaN
+    #
+    # We can summarize the pandas behavior for loc set as follows:
+    # 1. The result keeps the original row and column order, e.g., row is still from 0 to 2 and columns is from A to D.
+    # 2. The result can append duplicate and new columns (i.e., Column enlargement) which is defined in ``column_key``
+    #    in the above example (i.e., ``duplicate_data_column_pos_to_count_map`` and
+    #    ``new_data_column_pandas_labels_to_append`` from the arguments). The duplicated columns will be appended after
+    #    the original ones. And new columns will be appended after original columns, e.g., "E", 1, "X", 2. Note row
+    #    enlargement is not allowed in this case. It only happens when index is a scalar or tuple for multiindex.
+    # 3. Any cells which matches with the ``row_key`` and ``column_key`` will be updated. If the same row and column
+    #    index exist in ``item``, it will be updated using the item value (e.g., row 1, col "C"); otherwise, it will be
+    #    NaN (e.g., row 0, col "B").
+    # In this case, ``set_frame_2d_labels``'s arguments will be
+    #   columns = [1, 2, 3]
+    #   duplicate_data_column_pos_to_count_map = {1: 2, 2: 3}
+    #   new_data_column_pandas_labels_to_append = ["E", 1, "X", 2]
+    col_info = _extract_loc_set_col_info(internal_frame, columns)
+
+    # Some variables shared in this method
+    index_is_scalar = is_scalar(index)
+    index_is_frame = isinstance(index, InternalFrame)
+    item_is_frame = isinstance(item, InternalFrame)
+    item_is_scalar = is_scalar(item)
+
+    assert not isinstance(index, slice) or index == slice(
+        None
+    ), "Should only handle slice(None) here"
+
+    # stores the column values from list like item
+    item_column_values = []
+    # map from item's data column label to its position in the joined frame or item_column_values
+    item_data_col_label_to_pos_map: dict[Hashable, int] = {}
+    if item_is_frame:
+        # when item is not frame, this map will be initialized later
+        item_data_col_label_to_pos_map = {
+            label: pos
+            for pos, label in enumerate(
+                item.data_column_pandas_labels
+                if matching_item_columns_by_label
+                else col_info.column_pandas_labels
+            )
+        }
+
+        result_frame = _set_2d_labels_helper_for_frame_item(
+            internal_frame,
+            index,
+            item,
+            matching_item_columns_by_label,
+            matching_item_rows_by_label,
+            col_info,
+            index_is_bool_indexer,
+        )
+
+    if item_is_scalar:
+        result_frame = _set_2d_labels_helper_for_non_frame_item(
+            internal_frame, index, index_is_bool_indexer
+        )
+    elif not item_is_frame:
+        assert is_list_like(item) and not any(
+            is_list_like(i) for i in item
+        ), "Only support 1d item list here."
+
+        if len(item) == 0:
+            # pandas will raise length mismatch error for empty list case
+            raise ValueError(LOC_SET_ITEM_EMPTY_ERROR)
+
+        # When item is a list like item,
+        # 1. If it is a single value item, we handle it similar to scalar item
+        # 2. If it is a single column with an item list with size > 1, then we set the item values column-wise. For
+        # example, df.loc[['x','y'],'B'] = [97,96] would result in:
+        #
+        #       ... | B  | ...
+        #    -------+----+-----
+        #       'x' | 97 | ...
+        #       'y' | 96 | ...
+        #       ... | .. | ...
+        # 3. Otherwise, it sets multiple columns with an item with size > 1, we set the item values along the row. For
+        # example, df.loc[['x', 'y'], ['B', 'C']] = [97, 96] would result in:
+        #
+        #       ... | B  | C  |...
+        #    -------+----+----+-----
+        #       'x' | 97 | 96 | ...
+        #       'y' | 97 | 96 | ...
+        #       ... | .. | .. | ...
+        item_values = (
+            item.tolist() if isinstance(item, (pd.Index, np.ndarray)) else item
+        )
+
+        item_data_column_pandas_labels = col_info.column_pandas_labels
+
+        if len(item_values) == 1:
+            # handle single value list similar to scalar
+            result_frame = _set_2d_labels_helper_for_non_frame_item(
+                internal_frame, index, index_is_bool_indexer
+            )
+            item_is_scalar = True
+            item = item_values[0]
+        else:
+            # item has been updated with labels corresponding to the columns to be replaced.
+            matching_item_columns_by_label = True
+            item_data_col_label_to_pos_map = {
+                label: pos for pos, label in enumerate(item_data_column_pandas_labels)
+            }
+
+            expected_item_col_len = len(item_data_column_pandas_labels)
+            if expected_item_col_len == 1:
+                # set the item values column-wise
+                if isinstance(index, slice):
+                    index = get_index_frame_by_row_label_slice(internal_frame, index)
+
+                # If `item` is a pandas Index, or our indexer is a Snowpark pandas object,
+                # we need to enforce matching by row labels.
+                enforce_match_item_by_row_labels = (
+                    isinstance(item, pd.Index) or index_is_frame
+                )
+                result_frame = _set_2d_labels_helper_for_single_column_wise_item(
+                    internal_frame,
+                    index,
+                    item_values,
+                    item_data_column_pandas_labels,
+                    index_is_bool_indexer,
+                    enforce_match_item_by_row_labels,
+                )
+                # we convert bool indexer to non-bool one above so set it to False now
+                index_is_bool_indexer = False
+            else:
+                # set the item values along the row
+                if len(item_values) != expected_item_col_len:
+                    raise ValueError(LOC_SET_ITEM_KV_MISMATCH_ERROR_MESSAGE)
+                item_column_values = item_values
+                result_frame = _set_2d_labels_helper_for_non_frame_item(
+                    internal_frame, index, index_is_bool_indexer
+                )
+    # After 2-joins, we need to
+    #   1) update original columns existed in ``columns``, e.g., "B", "C", "D";
+    #   2) create duplicated columns, e.g., duplicated "B" and "C";
+    #   3) append new columns, e.g., "E", 1, "X", 2
+    origin_num_data_cols = len(internal_frame.data_column_pandas_labels)
+    # col position for the first column from ``item``
+    # When index is None, no index is joined so the offset is equal to origin_num_data_cols
+    item_data_col_offset = origin_num_data_cols
+    if isinstance(index, InternalFrame):
+        item_data_col_offset += len(index.data_column_pandas_labels)
+
+    # First Handling existing columns:
+    # If the column is not set, keep its original values
+    # If the column is set and has values in item, use the item value;
+    # Otherwise, set the value to NULL
+
+    # the data column from ``index`` frame, e.g., the index's data column [0, 1]
+    index_data_col = (
+        col(result_frame.data_column_snowflake_quoted_identifiers[origin_num_data_cols])
+        if index_is_frame or index_is_scalar
+        else None
+    )
+    result_frame_index_col = col(
+        result_frame.index_column_snowflake_quoted_identifiers[0]
+    )
+
+    def generate_updated_expr_for_existing_col(
+        col_pos: int,
+    ) -> Column:
+        """
+        Helper function to generate the updated existing column based on the item value.
+
+        Args:
+            col_pos: the existing column position from internal_frame
+
+        Returns:
+            The updated column
+        """
+        original_col = col(
+            result_frame.data_column_snowflake_quoted_identifiers[col_pos]
+        )
+        # col_pos can be any column in the original frame, i.e., internal_frame. So if it is not in
+        # existing_column_positions, we can just return the original column
+        if col_pos not in col_info.existing_column_positions:
+            return original_col
+
+        col_label = result_frame.data_column_pandas_labels[col_pos]
+        # col will be updated
+        if item_is_scalar:
+            col_obj = pandas_lit(item)
+        elif item_column_values:
+            col_obj = pandas_lit(
+                item_column_values[item_data_col_label_to_pos_map[col_label]]
+            )
+        elif not matching_item_columns_by_label:
+            # columns in item is matched by position not label here. E.g., assuming df as A, B, C three columns,
+            # df[["A", "B"]] = item will treat the first item's column as A and the second as B. However,
+            # df[["B", "A"]] = item will treat the first item's column as B and the second as A. Also, if column key
+            # contains duplicates, e.g, df[["A", "A"]] = item, then only the right index matters, i.e., the second
+            # column will be treated as A.
+            col_obj = col(
+                result_frame.data_column_snowflake_quoted_identifiers[
+                    item_data_col_offset
+                    + rindex(col_info.existing_column_positions, col_pos)
+                ]
+            )
+        elif (
+            matching_item_columns_by_label
+            and col_label in item_data_col_label_to_pos_map
+        ):
+            # col may have value in item, e.g., column "X"
+            col_obj = col(
+                result_frame.data_column_snowflake_quoted_identifiers[
+                    item_data_col_offset + item_data_col_label_to_pos_map[col_label]
+                ]
+            )
+        else:
+            # e.g., columns "E", i.e., column exists in the column key but not in item
+            col_obj = pandas_lit(None)
+        if index_is_scalar:
+            col_obj = iff(
+                result_frame_index_col.equal_null(pandas_lit(index)),
+                col_obj,
+                original_col,
+            )
+        elif index_is_bool_indexer:
+            # update col_obj only iff index_data_col is True
+            col_obj = iff(index_data_col, col_obj, original_col)
+        elif index_is_frame:
+            col_obj = iff(index_data_col.is_null(), original_col, col_obj)
+        return col_obj
+
+    def generate_updated_expr_for_new_col(col_label: Hashable) -> Column:
+        """
+        Helper function to generate the newly added column.
+
+        Args:
+            col_label: the label of the new column
+
+        Returns:
+            The new column
+        """
+        if item_is_scalar:
+            new_column = pandas_lit(item)
+        elif item_column_values:
+            new_column = item_column_values[item_data_col_label_to_pos_map[col_label]]
+        elif not matching_item_columns_by_label:
+            new_column = col(
+                result_frame.data_column_snowflake_quoted_identifiers[
+                    item_data_col_offset + item_data_col_label_to_pos_map[col_label]
+                ]
+            )
+        elif (
+            matching_item_columns_by_label
+            and col_label in item_data_col_label_to_pos_map
+        ):
+            new_column = col(
+                result_frame.data_column_snowflake_quoted_identifiers[
+                    item_data_col_offset + item_data_col_label_to_pos_map[col_label]
+                ]
+            )
+        else:
+            return pandas_lit(None)
+        if index_is_scalar:
+            new_column = iff(
+                result_frame_index_col.equal_null(pandas_lit(index)),
+                new_column,
+                pandas_lit(None),
+            )
+        elif index_is_bool_indexer:
+            # new_column will be None if index_data_col is not True
+            new_column = iff(index_data_col, new_column, pandas_lit(None))
+        elif index_is_frame:
+            new_column = iff(index_data_col.is_null(), pandas_lit(None), new_column)
+        return new_column
+
+    # The rest of code is to generate the list of project columns and labels for a loc set operation that can involve
+    # replacing existing column values as well as adding new columns. The caller must provide the callables to select
+    # the appropriate column replacement or new column logic.
+    #
+    # We use project_columns methods to handle all columns in one go
+    project_labels, project_columns = [], []
+
+    num_data_columns = len(internal_frame.data_column_pandas_labels)
+    duplicate_data_column_pos_to_count_map = (
+        col_info.existing_column_duplication_cardinality_map
+    )
+    new_data_column_pandas_labels_to_append = col_info.new_column_pandas_labels
+
+    if deduplicate_columns:
+        # Remove all duplicate columns from the result frame. This is used by setitem only.
+        # First, empty duplicate map
+        duplicate_data_column_pos_to_count_map = {}
+        dedup = []
+        # Second, remove duplicate labels for new column to append
+        for label in new_data_column_pandas_labels_to_append:
+            if label not in dedup:
+                dedup.append(label)
+        new_data_column_pandas_labels_to_append = dedup
+
+    # add original data columns and their duplicates
+    for col_pos in range(num_data_columns):
+        col_label = result_frame.data_column_pandas_labels[col_pos]
+        project_labels.append(col_label)
+        col_obj = generate_updated_expr_for_existing_col(col_pos)
+        project_columns.append(col_obj)
+
+        # When duplicate is needed, pandas will duplicate the column right after the original columns.
+        if col_pos in duplicate_data_column_pos_to_count_map:
+            cnt = duplicate_data_column_pos_to_count_map[col_pos]
+            project_labels += [col_label] * cnt
+            project_columns += [col_obj] * cnt
+
+    # Last, append new columns
+    for col_label in new_data_column_pandas_labels_to_append:
+        new_column = generate_updated_expr_for_new_col(col_label)
+        project_labels.append(col_label)
+        project_columns.append(new_column)
+
+    return result_frame.project_columns(
+        pandas_labels=project_labels, column_objects=project_columns
+    )
+
+
+def set_frame_2d_positional(
+    internal_frame: InternalFrame,
+    index: InternalFrame,
+    columns: list[int],
+    set_as_coords: bool,
+    item: Union[InternalFrame, Scalar],
+    is_item_series: bool,
+) -> InternalFrame:
+    """
+    Helper function to handle the general (worst case) 2-join case where index (aka row_key) and item are both frames.
+
+    This general case involves a query with 2 joins, one for joining index and item (row_key and item values), and then
+    joining with the original internal_frame.  For the case where item is based on a series (item_is_series), we need
+    to transpose the series row values into columns to match pandas semantics.
+
+    Note if len(index) > len(item) then the resulting 2d frame projects the last item values to align with the index.
+    For example, if index=[0,1,2] with item = [99, 100] the result would be kv-frame index-item being set would be
+    [0,99], [1,100], [2,100].
+
+    For example:
+        df = pd.DataFrame([
+            [1, 2, 3, 4],
+            [5, 6, 7, 8],
+            [9, 10, 11, 12],
+            [13, 14, 15, 16]
+        ])
+
+        pd.iloc[
+            pd.Series([1,2,3]),
+            pd.Series([0,2,3])
+        ] = pd.DataFrame([
+            [91, 92, 93],
+            [94, 95, 96],
+            [97, 98, 99]
+        ])
+
+        the resulting frame would be:
+
+        __index__ |  0  |  1  |  2  |  3  | __row_position__
+        ==========|=====|=====|=====|=====|==================
+             0    |  1  |  2  |  3  |  4  |       0
+             1    |  91 |  2  |  92 |  93 |       1
+             2    |  94 |  2  |  95 |  96 |       2
+             3    |  97 |  2  |  96 |  99 |       3
+
+    Parameters
+    ----------
+    internal_frame: The original frame to set the item values into
+    index: The row key values that specify the row positions of the internal_frame to set.
+    columns: The col key values that specify the column positions of the internal_frame to set.
+    set_as_coords: To set coordinates (row, col) pairs rather than per row and col alignment.
+    item: The item values to set into the internal_frame, this could be an internal_frame or scalar value.
+    is_item_series: Whether the item frame came from Series
+
+    Results
+    -------
+    The result is a frame that has the indexed row and columns replaced with item values.
+    """
+    index_data_type = index.quoted_identifier_to_snowflake_type()[
+        index.data_column_snowflake_quoted_identifiers[0]
+    ]
+
+    # If index is a bool_indexer then convert to same-sized position index, False values will be null.
+    if isinstance(index_data_type, BooleanType):
+        index = get_row_position_index_from_bool_indexer(index)
+    else:
+        index = _get_adjusted_key_frame_by_row_pos_int_frame(internal_frame, index)
+
+    assert isinstance(index_data_type, (_IntegralType, BooleanType))
+
+    if isinstance(item, InternalFrame):
+        # If item is Series (rather than a Dataframe), then we need to flip the series item values so they apply across
+        # columns rather than rows.
+        if is_item_series and len(columns) > 1:
+            item = get_item_series_as_single_row_frame(item, len(columns))
+
+        # Combine the index (key) and item (values) into one key-value frame.  Note that the column length of index
+        # may be changed here and we need this to properly index into the kv_frame later.
+        kv_frame = get_kv_frame_from_index_and_item_frames(index, item)
+        item_data_columns_len = len(item.data_column_snowflake_quoted_identifiers)
+    else:
+        kv_frame = index.append_column(ITEM_VALUE_LABEL, pandas_lit(item))
+        item_data_columns_len = 1
+
+    # Next we join the key-value frame with the original frame based on row_position and row key
+    # values.  In this example, assuming the original df (LHS) has data columns A, B, C, D would result in:
+    # (note some columns are omitted for brevity here):
+    #
+    #   [df]      [df]  [df]  [df]  [df]        [df]           [index]    [item]  [item]  [item]
+    # __index__ |  A  |  B  |  C  |  D  | __row_position__ | __reduced__ | col_1 | col_2 | col_3
+    # ==========|=====|=====|=====|=====|==================|=============|=======|=======|=======
+    #     0     |  1  |  2  |  3  |  4  |        0         |     null    |  null |  null | null
+    #     1     |  5  |  6  |  7  |  8  |        1         |      1      |   91  |   92  |  93
+    #     2     |  9  |  10 |  11 |  12 |        2         |      2      |   94  |   95  |  96
+    #     3     |  13 |  14 |  15 |  16 |        3         |      3      |   97  |   98  |  99
+    #
+    # In the join result, the original dataframe data is aligned with the items that are to replace for the rows.
+
+    frame = internal_frame.ensure_row_position_column()
+    kv_frame = kv_frame.ensure_row_position_column()
+
+    # To match the columns of the original dataframe (see [df] above) and the item values (see [item] above) we
+    # use the "columns" containing the column position indices.  For example, if columns=[0, 2, 3] this would
+    # map to the frame data columns, by corresponding position, [A, C, D].  To find the corresponding offset
+    # within the item data columns, we look up the index in columns, for example, column 2 is index 1 in
+    # the "columns" list.  To find the absolute item offset, we calculate after the frame and index data columns.
+    item_data_columns_offset = len(frame.data_column_pandas_labels) + len(
+        index.data_column_pandas_labels
+    )
+
+    df_kv_frame, result_column_mapper = join(
+        left=frame,
+        right=kv_frame,
+        how="left",
+        left_on=[frame.row_position_snowflake_quoted_identifier],
+        right_on=[kv_frame.data_column_snowflake_quoted_identifiers[0]],
+    )
+
+    # Get the row position column from the index of the combined join (frame, index, item)
+    assert kv_frame.row_position_snowflake_quoted_identifier is not None
+    kv_row_position_snowflake_quoted_identifier = (
+        result_column_mapper.map_right_quoted_identifiers(
+            [kv_frame.row_position_snowflake_quoted_identifier]
+        )[0]
+    )
+
+    select_list = frame.index_column_snowflake_quoted_identifiers
+
+    # Generate a projection that replaces the original df columns (A,B,C,D) with item columns (col_1, col_2, col_3)
+    # at the respective column positions based on whether the row key column (__reduced__) is null or not.
+    #
+    #    __index__ |  A  |  B  |  C  |  D  | __row_position__
+    #    ==========|=====|=====|=====|=====|==================
+    #         0    |  1  |  2  |  3  |  4  |       0
+    #         1    |  91 |  6  |  92 |  93 |       1
+    #         2    |  94 |  10 |  95 |  96 |       2
+    #         3    |  97 |  14 |  96 |  99 |       3
+
+    new_data_column_snowflake_quoted_identifiers = (
+        df_kv_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=frame.data_column_pandas_labels,
+        )
+    )
+
+    df_kv_frame = df_kv_frame.ensure_row_position_column()
+
+    for col_pos, snowflake_quoted_identifier_pair in enumerate(
+        zip(
+            frame.data_column_snowflake_quoted_identifiers,
+            new_data_column_snowflake_quoted_identifiers,
+        )
+    ):
+        (
+            original_snowflake_quoted_identifier,
+            new_snowflake_quoted_identifier,
+        ) = snowflake_quoted_identifier_pair
+
+        if col_pos in columns and item_data_columns_len > 0:
+            # Calculate the offset into the item values of the joined frame.  We can't go past the size of
+            # the kv data column length, so in that case we use the last item value column.  For example, suppose the
+            # columns are [1,2,3,4] and the item column values [5,6] then will be filled with [5,6,6,6].  We also
+            # need to look up based on the last index position to handle column duplicates properly, for example
+            # if the columns are [0,2,1,2] with item column values [5, 6, 7, 8] should be filled with [5, 7, 8].
+            column_index_position = rindex(columns, col_pos)
+            kv_item_data_col_pos = min(column_index_position, item_data_columns_len - 1)
+
+            # Calculate the column position within the joined frame of the current item data column to replace.
+            df_snowflake_quoted_identifier = col(original_snowflake_quoted_identifier)
+            item_snowflake_quoted_identifier = col(
+                df_kv_frame.data_column_snowflake_quoted_identifiers[
+                    item_data_columns_offset + kv_item_data_col_pos
+                ]
+            )
+
+            set_cond_expr = (
+                col(kv_row_position_snowflake_quoted_identifier)
+                == pandas_lit(column_index_position)
+                if set_as_coords
+                else col(kv_row_position_snowflake_quoted_identifier).is_not_null()
+            )
+
+            select_list.append(
+                iff(
+                    set_cond_expr,
+                    item_snowflake_quoted_identifier,
+                    df_snowflake_quoted_identifier,
+                ).as_(new_snowflake_quoted_identifier)
+            )
+        else:
+            select_list.append(
+                col(original_snowflake_quoted_identifier).as_(
+                    new_snowflake_quoted_identifier
+                )
+            )
+
+    select_list.append(df_kv_frame.row_position_snowflake_quoted_identifier)
+    ordered_dataframe = df_kv_frame.ordered_dataframe.select(select_list)
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=frame.data_column_pandas_labels,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def get_kv_frame_from_index_and_item_frames(
+    index: InternalFrame, item: InternalFrame
+) -> InternalFrame:
+    """
+    Return the key-value frame from the key (index) and item (values) frames by aligning on the row positions.
+    If the len(index) > len(item) then the resulting key-value frame projects the last item values down.  For
+    example, if index=[0,1,2] with item = [99,100] the result would be kv-frame [0,99], [1,100], [2,100].
+
+    Parameters
+    ----------
+    index: The row key values that specify the row positions of the internal_frame to set.
+    item: The item values to set into the internal_frame.
+
+    Results
+    -------
+    frame: The result is a frame that joins the index and item so they are aligned based on the index row positions.
+    """
+    # The first step is to join the index (aka row_key) and item into a single aligned dataframe.  To do this,
+    # we join based on their row positions.  In the example earlier, in the resulting key-value snowpark dataframe
+    # would look like (the headers show where the columns come from either [index] or [item]):
+    #
+    #    [index]      [index]         [index]         [item]      [item]  [item]  [item]     [item]
+    #   __index__ | __reduced__ | __row_position__ | __index__2 | col_1 | col_2 | col_3 | __row_position__
+    # ============|=============|==================|============|=======|=======|=======|==================|
+    #      0      |       1     |         0        |     0      |   91  |  92   |   93  |        0
+    #      1      |       2     |         1        |     1      |   94  |  95   |   96  |        1
+    #      2      |       3     |         2        |     2      |   97  |  98   |   99  |        2
+    #
+    # Note that the column "__reduced__" above is essentially the row key values, ie. the row positions that are
+    # to be selected to replace with item values.  If the item has fewer rows, we follow the join with
+    # a conditional lag to project down the last item row to the empty item rows.
+
+    index = index.ensure_row_position_column()
+    item = item.ensure_row_position_column()
+
+    kv_frame, result_column_mapper = join(
+        left=index,
+        right=item,
+        left_on=[index.row_position_snowflake_quoted_identifier],
+        right_on=[item.row_position_snowflake_quoted_identifier],
+        how="left",
+    )
+
+    index_row_position_snowflake_quoted_identifier = (
+        result_column_mapper.map_left_quoted_identifiers(
+            [index.row_position_snowflake_quoted_identifier]
+        )[0]
+    )
+    item_row_snowflake_quoted_identifier = (
+        result_column_mapper.map_right_quoted_identifiers(
+            [item.row_position_snowflake_quoted_identifier]
+        )[0]
+    )
+
+    # If len(item) < len(index), then the join result will contain a null row position and values.  In this case we
+    # want to use the last matching item values.  To do this, we check if the item row position is null, and if so,
+    # then use the lag value of the item data column.   For example, if index=[0,1,2] with item = [99,100] the result
+    # would be kv-frame [0,99], [1,100], [2,100].
+    num_index_data_columns = len(index.data_column_snowflake_quoted_identifiers)
+    kv_item_data_column_pandas_labels = kv_frame.data_column_pandas_labels[
+        num_index_data_columns:
+    ]
+    kv_item_data_column_snowflake_identifiers = (
+        kv_frame.data_column_snowflake_quoted_identifiers[num_index_data_columns:]
+    )
+    new_item_data_column_snowflake_identifiers = (
+        kv_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[MAX_ROW_POSITION_COLUMN_LABEL]
+            + kv_item_data_column_pandas_labels,
+        )
+    )
+    max_row_position_snowflake_quoted_identifier = (
+        new_item_data_column_snowflake_identifiers[0]
+    )
+    new_item_data_column_snowflake_identifiers = (
+        new_item_data_column_snowflake_identifiers[1:]
+    )
+
+    kv_frame_ordered_dataframe = kv_frame.ordered_dataframe
+    for snowflake_quoted_identifier, new_snowflake_quoted_identifier in zip(
+        kv_item_data_column_snowflake_identifiers,
+        new_item_data_column_snowflake_identifiers,
+    ):
+        kv_frame_ordered_dataframe = append_columns(
+            kv_frame_ordered_dataframe,
+            new_snowflake_quoted_identifier,
+            iff(
+                col(item_row_snowflake_quoted_identifier).is_null(),
+                lag(col(snowflake_quoted_identifier), ignore_nulls=True).over(
+                    Window.order_by(index_row_position_snowflake_quoted_identifier)
+                ),
+                col(snowflake_quoted_identifier),
+            ),
+        )
+
+    # If there are duplicates, we need to filter them out and use the last matching item value data.  To find the last
+    # matching item values, we calculate the max_row_position across each row position (__reduced__) window and filter
+    # out any earlier duplicates.  For example, consider a frame with the following index, row positions (other
+    # columns are omitted for brevity), we add a temporary max_row_position column to filter with row_positions.
+    #
+    #    [index]      [index]         [index]             [item]     |
+    #   __index__ | __reduced__ | __row_position__ |   ..(values)..  |   [max_row_position]
+    # ============|=============|==================|=================|=======================
+    #      0      |       1     |         0        |     ..(1)..     |         0
+    #      1      |       2     |         1        |     ..(2)..     |         3
+    #      2      |       3     |         2        |     ..(3)..     |         2
+    #      3      |       2     |         3        |     ..(4)..     |         3
+    #
+    # would yield the following result (notice original row_positions 1 and 3 have duplicate index.__reduced__ values,
+    # so in the result we only need to join with the last item value in this case from row_position=3):
+    #
+    #    [index]      [index]         [index]             [item]
+    #   __index__ | __reduced__ | __row_position__ |       ...
+    # ============|=============|==================|=================
+    #      0      |       1     |         0        |     ..(1)..
+    #      2      |       3     |         2        |     ..(3)..
+    #      3      |       2     |         3        |     ..(4)..
+
+    kv_frame_ordered_dataframe = append_columns(
+        kv_frame_ordered_dataframe,
+        max_row_position_snowflake_quoted_identifier,
+        max_(index_row_position_snowflake_quoted_identifier).over(
+            Window.partition_by(
+                col(index.data_column_snowflake_quoted_identifiers[0])
+            ).order_by(col(index.data_column_snowflake_quoted_identifiers[0]))
+        ),
+    ).filter(
+        col(max_row_position_snowflake_quoted_identifier)
+        == col(index_row_position_snowflake_quoted_identifier)
+    )
+    kv_frame_ordered_dataframe = kv_frame_ordered_dataframe.sort(
+        kv_frame.ordering_columns
+    )
+
+    new_kv_frame = InternalFrame.create(
+        ordered_dataframe=kv_frame_ordered_dataframe,
+        data_column_pandas_labels=kv_frame.data_column_pandas_labels[
+            :num_index_data_columns
+        ]
+        + kv_item_data_column_pandas_labels,
+        data_column_pandas_index_names=kv_frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=kv_frame.data_column_snowflake_quoted_identifiers[
+            :num_index_data_columns
+        ]
+        + new_item_data_column_snowflake_identifiers,
+        index_column_pandas_labels=kv_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=kv_frame.index_column_snowflake_quoted_identifiers,
+    )
+
+    return new_kv_frame
+
+
+def get_item_series_as_single_row_frame(
+    item: InternalFrame, num_columns: int
+) -> InternalFrame:
+    """
+    Get an internal frame that transpose single data column into frame with single row.  For example, if the
+    original series is pd.Series([1, 2, 4]) then:
+
+    __index__ | __reduced__ | __row_position__
+    ==========|=============|==================
+        0     |     1       |        0
+        1     |     2       |        1
+        2     |     4       |        2
+
+    the returned frame would be:
+
+    __index__ | "0" | "1" | "2" | __row_position__
+    ==========|=====|=====|=====|==================
+        0     |  1  |  2  |  4  |         0
+
+    Parameters
+    ----------
+        num_columns: Number of columns in the return frame
+        item: Item frame that contains a single column of values.
+
+    Returns
+    -------
+        Frame containing single row with columns for each row.
+    """
+    item = item.ensure_row_position_column()
+    item_series_pandas_labels = list(range(num_columns))
+
+    # This is a 2 step process.
+    #
+    # 1. Filter each row and column, above example the intermediate result would be:
+    #
+    #  "0"  | "1"  | "2"  | last_col
+    # ======|======|======|=========
+    #   1   | null | null |   4
+    #  null |  2   | null |   4
+    #  null | null |  4   |   4
+    #
+    # 2. Collapse each column to a single column value which is present.  If there is no value present then
+    # use the last value.  For this example, the result would be:
+    #
+    #  "0"  | "1"  | "2"
+    # ======|======|======
+    #   1   |  2   |  4
+    # and add __index_ and __row_position__ so conforming frame.
+    #
+    # To handle the case where item.num_rows < num_columns we fill in the remaining with the last item value.  So
+    # for example:
+    # get_item_series_as_single_row_frame([100, 102, 105], 5) returns [100, 102, 105, 105, 105]
+
+    item_frame = item.append_column(
+        LAST_VALUE_COLUMN,
+        last_value(col(item.data_column_snowflake_quoted_identifiers[0])).over(
+            Window.order_by(col(item.row_position_snowflake_quoted_identifier))
+        ),
+    )
+    last_value_snowflake_quoted_identifier = (
+        item_frame.data_column_snowflake_quoted_identifiers[-1]
+    )
+
+    item_series_snowflake_quoted_identifiers: list[str] = []
+    item_series_column_exprs: list[Column] = []
+    for row_position, pandas_label in enumerate(item_series_pandas_labels):
+        new_snowflake_quoted_identifier = (
+            item_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[pandas_label],
+                excluded=item_series_snowflake_quoted_identifiers,
+            )[0]
+        )
+        new_column_expr = coalesce(
+            min_(
+                iff(
+                    col(item.row_position_snowflake_quoted_identifier)
+                    == lit(row_position),
+                    col(item.data_column_snowflake_quoted_identifiers[0]),
+                    None,
+                )
+            ).over(),
+            min_(col(last_value_snowflake_quoted_identifier)).over(),
+        )
+        item_series_snowflake_quoted_identifiers.append(new_snowflake_quoted_identifier)
+        item_series_column_exprs.append(new_column_expr)
+
+    item_ordered_dataframe = append_columns(
+        item_frame.ordered_dataframe,
+        item_series_snowflake_quoted_identifiers,
+        item_series_column_exprs,
+    )
+    item_ordered_dataframe = item_ordered_dataframe.select(
+        item.index_column_snowflake_quoted_identifiers[0],
+        *item_series_snowflake_quoted_identifiers,
+        item.row_position_snowflake_quoted_identifier,
+    ).filter(col(item.row_position_snowflake_quoted_identifier) == 0)
+    item_ordered_dataframe = item_ordered_dataframe.sort(
+        OrderingColumn(item.row_position_snowflake_quoted_identifier)
+    )
+    item = InternalFrame.create(
+        ordered_dataframe=item_ordered_dataframe,
+        data_column_pandas_labels=item_series_pandas_labels,
+        data_column_pandas_index_names=item.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=item_series_snowflake_quoted_identifiers,
+        index_column_pandas_labels=item.index_column_pandas_labels[:1],
+        index_column_snowflake_quoted_identifiers=item.index_column_snowflake_quoted_identifiers[
+            :1
+        ],
+    )
+    return item
+
+
+def get_row_position_index_from_bool_indexer(index: InternalFrame) -> InternalFrame:
+    """
+    Get the index positions for a bool_indexer frame.  Note that we the number of rows in the resulting dataframe
+    can be less since rows with False value are omitted in the result.
+
+    For example, for the index frame "pd.Series([False, True, True, False, True]":
+
+    __index__ | __reduced__ | __row_position__
+    ==========|=============|==================
+        0     |     False   |        0
+        1     |     True    |        1
+        2     |     True    |        2
+        3     |     False   |        3
+        4     |     True    |        4
+
+    the row position index frame returned would be:
+
+    __index__ | __reduced__ | __row_position__
+    ==========|=============|==================
+        0     |     1       |        0
+        1     |     2       |        1
+        2     |     4       |        2
+
+    Parameters
+    ----------
+    index: Frame with boolean data for each row on whether is index position
+
+    Returns
+    -------
+    Returns frame with positions selected by boolean indexer.
+    """
+
+    # This is 2-step process.
+    #
+    # First, filter down to the index positions that are to be included, in this case intermediate result is:
+    #
+    # __index__ | __reduced__ | __row_position__
+    # ==========|=============|==================
+    # 1     |     True    |        1
+    # 2     |     True    |        2
+    # 4     |     True    |        4
+    #
+    # Next, regenerate the index and row position to be 0-based consecutive.
+    #
+    # __index__ | __reduced__ | __row_position__
+    # ==========|=============|==================
+    #     1     |     1       |        0
+    #     2     |     2       |        1
+    #     4     |     4       |        2
+
+    index = index.ensure_row_position_column()
+
+    (
+        index_column_snowflake_quoted_identifier,
+        data_column_snowflake_quoted_identifier,
+    ) = index.ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[
+            index.index_column_pandas_labels[0],
+            index.data_column_pandas_labels[0],
+        ],
+    )
+
+    index_ordered_dataframe = index.ordered_dataframe.filter(
+        col(index.data_column_snowflake_quoted_identifiers[0])
+    ).select(
+        [
+            col(index.index_column_snowflake_quoted_identifiers[0]).as_(
+                index_column_snowflake_quoted_identifier
+            ),
+            col(index.row_position_snowflake_quoted_identifier).as_(
+                data_column_snowflake_quoted_identifier
+            ),
+        ]
+    )
+    index_ordered_dataframe = index_ordered_dataframe.sort(
+        OrderingColumn(data_column_snowflake_quoted_identifier)
+    )
+    index = InternalFrame.create(
+        ordered_dataframe=index_ordered_dataframe,
+        data_column_pandas_labels=index.data_column_pandas_labels[:1],
+        data_column_pandas_index_names=index.data_column_index_names,
+        data_column_snowflake_quoted_identifiers=[
+            data_column_snowflake_quoted_identifier
+        ],
+        index_column_pandas_labels=index.index_column_pandas_labels[:1],
+        index_column_snowflake_quoted_identifiers=[
+            index_column_snowflake_quoted_identifier
+        ],
+    )
+    return index
+
+
+def get_row_pos_frame_from_row_key(
+    key: Union[
+        "snowflake_query_compiler.SnowflakeQueryCompiler", Scalar, list, slice, tuple
+    ],
+    frame: InternalFrame,
+) -> InternalFrame:
+    """
+    Return a frame that contains the row positions if provided as a scalar/list/slice/etc
+
+    Parameters
+    ----------
+    key: the row key positions
+
+    Returns
+    -------
+    Returns a frame containing the row position values as a frame.
+    """
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    if isinstance(key, SnowflakeQueryCompiler):
+        return key._modin_frame
+
+    if isinstance(key, slice):
+        # If it is a slice, then use the row_position from the original frame and filter down based on the slice.
+        frame = frame.ensure_row_position_column()
+
+        key_frame = frame.project_columns(
+            [MODIN_UNNAMED_SERIES_LABEL],
+            [col(frame.row_position_snowflake_quoted_identifier)],
+        )
+
+        key_frame = get_frame_by_row_pos_slice_frame(key_frame, key)
+        return key_frame
+    else:
+        if isinstance(key, int):
+            key = [key]
+        else:
+            key = list(key)
+
+        return pd.Series(key)._query_compiler._modin_frame
+
+
+def get_snowflake_filter_for_row_label(
+    frame: InternalFrame, label: Any, level: Optional[int] = None
+) -> Optional[Column]:
+    """
+    Get Snowflake filter expression for given 'label' to match against index labels
+    in given 'frame'
+    Args:
+        frame: Internal frame
+        label: 'label' to generate expression for.
+        level: Optional level to use from index. If None, generate expression to match
+            against all index levels/columns.
+
+    Returns:
+        A snowflake filter expression represented by Snowpark Column.
+        None implies no match.
+
+    """
+    index_column_identifiers = frame.index_column_snowflake_quoted_identifiers
+    # Single level index
+    if not frame.is_multiindex(axis=0):
+        return col(index_column_identifiers[0]) == pandas_lit(label)
+
+    # MultiIndex and level is specified.
+    if level is not None:
+        if level < frame.num_index_levels(axis=0):
+            return col(index_column_identifiers[level]) == pandas_lit(label)
+        else:
+            return None
+
+    # MultiIndex and level is not specified. If given label is not a tuple match
+    # against first level.
+    if not isinstance(label, tuple):
+        return col(index_column_identifiers[0]) == pandas_lit(label)
+
+    # MultiIndex and level is not specified. If given label is a tuple match
+    # against all levels by treating given 'label' as prefix.
+    # Special case handling: Empty tuple matches with everything.
+    # This behavior is same as native pandas.
+    if len(label) == 0:
+        return pandas_lit(True)
+
+    # Try to match with individual columns from multi index
+    if len(label) <= frame.num_index_levels(axis=0):
+        mi_filter = None
+        for index_col_id, k in zip(index_column_identifiers, label):
+            col_filter = col(index_col_id) == pandas_lit(k)
+            mi_filter = col_filter if mi_filter is None else mi_filter & col_filter
+        return mi_filter
+    return None
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/io_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/io_utils.py
new file mode 100644
index 00000000000..32b3d0f0e63
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/io_utils.py
@@ -0,0 +1,161 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import glob
+import os
+from collections.abc import Hashable
+from typing import Any, Callable, Union
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark.session import Session
+
+PANDAS_KWARGS = {"names", "index_col", "usecols", "dtype"}
+
+
+def upload_local_path_to_snowflake_stage(
+    session: Session, path: str, sf_stage: str
+) -> None:
+    """
+    Uploads the contents of a local filepath (file or folder) ``path``
+    to a staged location ``sf_stage`` in Snowflake.
+
+    Parameters
+    ----------
+    session : Session
+    Session object in Snowpark.
+
+    path : str
+    File path to local file or folder.
+
+    sf_stage : str
+    Name of Snowflake stage to upload files to.
+    """
+
+    # local file that begins with '@' (represents SF stage)
+    if path.startswith(r"\@"):
+        path = path[1:]
+
+    # Snowflake uses glob patterns by default,
+    # so escape any file path for special symbols like *, . in glob patterns
+    if os.path.isdir(path):
+        files = os.listdir(path)
+        files_to_upload = [
+            glob.escape(os.path.join(path, file))
+            for file in files
+            if os.path.isfile(os.path.join(path, file))
+        ]
+    elif os.path.isfile(path):
+        files_to_upload = [glob.escape(path)]
+    else:
+        raise ValueError("path must be a folder or a file.")
+
+    for file in files_to_upload:
+        session.file.put(file, sf_stage, overwrite=True)
+
+
+def is_local_filepath(filepath: str) -> bool:
+    """
+    Returns whether a filepath is local.
+
+    Parameters
+    ----------
+    filepath : str
+    File path to file or folder
+
+    Returns
+    -------
+    bool
+    Whether a filepath is local.
+    """
+
+    return not filepath.startswith("@") or filepath.startswith(r"\@")
+
+
+def get_non_pandas_kwargs(kwargs: Any) -> Any:
+    """
+    Returns a new dict without pandas keyword
+    arguments.
+
+    Args:
+        kwargs : Dict of keyword arguments to filter.
+
+    Returns:
+        dict without pandas kwargs.
+    """
+
+    snowpark_reader_kwargs = {
+        kwarg_name: kwarg_value
+        for kwarg_name, kwarg_value in kwargs.items()
+        if kwarg_name not in PANDAS_KWARGS
+    }
+
+    return snowpark_reader_kwargs
+
+
+def get_columns_to_keep_for_usecols(
+    usecols: Union[Callable, list[str], list[int]],
+    columns: pd.Index,
+    maintain_usecols_order: bool = False,
+) -> list[Hashable]:
+    """
+    Returns a subset of `df_columns` to keep, based on `usecols`.
+
+    Parameters
+    ----------
+    usecols : Callable, list of str, list of int.
+        If `usecols` is a Callable, the callable function will be evaluated against the column names,
+        returning names where the callable function evaluates to True. If `usecols` is a list, all elements must either
+        be positional (i.e. integer indices into the document columns) or strings
+        that correspond to column names provided either by the user in `names` or
+        inferred from the document header row(s).
+
+    columns : `pd.Index`.
+        An index containing all DataFrame column labels
+
+    maintain_usecols_order : bool, default False
+        If True, the result's order is based on usecols. Otherwise, the order is based on columns.
+
+    Returns
+    -------
+    List.
+        Subset of columns to keep.
+
+    Raises
+    ------
+    ValueError
+        If column(s) expected in `usecols` are not found in frame's columns `columns`.
+    """
+
+    if not isinstance(usecols, list):
+        keep = [column for column in columns if usecols(column)]
+    else:
+        if not usecols:
+            keep = []
+        else:
+            if isinstance(usecols[0], str):
+                invalid_columns = [
+                    column for column in usecols if column not in columns
+                ]
+                if invalid_columns:
+                    raise ValueError(
+                        f"'usecols' do not match columns, columns expected but not found: {invalid_columns}"
+                    )
+            else:
+                invalid_columns = [
+                    column for column in usecols if column < 0 or column >= len(columns)  # type: ignore[operator]
+                ]
+                if invalid_columns:
+                    raise ValueError(
+                        f"'usecols' do not match columns, columns expected but not found: {invalid_columns}"
+                    )
+
+                # Turn index references to pandas labels.
+                usecols = [columns[column] for column in usecols]
+
+            l1, l2 = (
+                (usecols, columns) if maintain_usecols_order else (columns, usecols)
+            )
+            keep = [column for column in l1 if column in l2]
+
+    return keep
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py
new file mode 100644
index 00000000000..ab2a56cb47c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py
@@ -0,0 +1,306 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pandas as native_pd
+
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import (
+    array_construct,
+    array_contains,
+    cast,
+    coalesce,
+    col,
+    to_variant,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.indexing_utils import set_frame_2d_labels
+from snowflake.snowpark.modin.plugin._internal.type_utils import infer_series_type
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    append_columns,
+    generate_new_labels,
+    is_duplicate_free,
+    pandas_lit,
+)
+from snowflake.snowpark.modin.plugin._typing import ListLike
+from snowflake.snowpark.types import DataType, DoubleType, VariantType, _IntegralType
+
+
+def convert_values_to_list_of_literals_and_return_type(
+    values: ListLike,
+) -> tuple[DataType, list[SnowparkColumn]]:
+    """
+    Given list-like (scalar) values, return a tuple of the datatype of a literal expression all values can attain,
+    and a list of Snowpark literal expressions
+    Args:
+        values: list-like values to convert to literals.
+
+    Returns:
+        Tuple of datatype and list of literal expressions.
+    """
+    # helper function to convert list-like values to list of Snowpark literal expressions. Returns
+    # the datatype for these literals as well.
+    values_dtype = infer_series_type(native_pd.Series(values))
+
+    # Use variant literals for heterogenous types within series, because in SQL for a query like
+    # SELECT 'test' IN (7 :: INT, 'test', '89.9' :: FLOAT) Snowflake will implicitly attempt to cast the values
+    # to FLOAT and produce an error because 'test' can't be cast.
+    if isinstance(values_dtype, VariantType):
+        return values_dtype, [pandas_lit(value, VariantType()) for value in values]
+    else:
+        return values_dtype, [pandas_lit(value) for value in values]
+
+
+def scalar_isin_expression(
+    quoted_identifier: str,
+    values: list[SnowparkColumn],
+    column_dtype: DataType,
+    values_dtype: DataType,
+) -> SnowparkColumn:
+    """
+    Generates isin-equivalent expression to be compatible with pandas behavior. Addresses the following cases for values:
+        1. empty list.
+        2. numeric values on either side requiring upcasting to float.
+        3. isin involving variant on either side.
+
+    Args:
+        quoted_identifier: quoted identifier for which to apply isin expression, i.e. quoted_identifier.isin(values).
+        values: values to check in-relationwship with quoted identifier.
+        column_dtype: type of the column indexed through quoted_identifier.
+        values_dtype: type of the values given as list of Snowpark expressions.
+
+    Returns:
+        Snowpark columnar expression for pandas-equivalent isin logic.
+    """
+
+    # Case 1: empty list: return False.
+    if isinstance(values, list) and 0 == len(values):
+        return pandas_lit(False)
+
+    column = col(quoted_identifier)
+
+    # Case 2: If either type of values/col(quoted_identifier) is float, upcast to float because of ORM mismatch.
+    # Handle here col(quoted_identifier) being double type:
+    if isinstance(values_dtype, _IntegralType) and isinstance(column_dtype, DoubleType):
+        values = [cast(value, DoubleType()) for value in values]
+
+    # Handle here values being double type:
+    elif isinstance(values_dtype, DoubleType) and isinstance(
+        column_dtype, _IntegralType
+    ):
+        column = cast(column, DoubleType())
+
+    # Case 3: If column's and values' data type differs
+    # perform isin over variant type when either side is variant type.
+    elif values_dtype != column_dtype and (
+        isinstance(values_dtype, VariantType) or isinstance(column_dtype, VariantType)
+    ):
+        # Ensure values are list of literals.
+        values = [
+            pandas_lit(literal_expr._expression.value, VariantType())
+            for literal_expr in values
+        ]
+
+    values = array_construct(*values)
+
+    # to_variant is a requirement for array_contains, else an error is produced.
+    return array_contains(to_variant(column), values)
+
+
+def compute_isin_with_series(
+    frame: InternalFrame, values_series: InternalFrame
+) -> InternalFrame:
+    """
+    Computes new InternalFrame holding the result of DataFrame.isin(<Series obj>).
+
+    Note that frame must be a non-empty DataFrame, i.e. frame must have row_count > 0.
+    Assumes further that index.is_unique() holds for values_series.
+
+    Args:
+        frame: InternalFrame, lhs of the isin operation.
+        values_series: InternalFrame representing the Series object
+
+    Returns:
+        InternalFrame
+    """
+    # For each row in this dataframe
+    # align the index with the index of the values Series object.
+    # If it matches, return True, else False
+
+    # create new label and new identifier to store result of aggregating values into a single array representing
+    # the unique values, i.e. array_agg(distinct ${data_column_quoted_identifier})
+    agg_label = generate_new_labels(
+        pandas_labels=["agg"],
+        excluded=frame.data_column_pandas_labels,
+    )[0]
+
+    new_frame = set_frame_2d_labels(
+        frame,
+        slice(None),
+        [agg_label],
+        values_series,
+        False,
+        True,
+        False,
+        False,
+    )
+
+    # apply isin operation for all columns except the appended agg_label/agg_identifier column.
+    agg_identifier = new_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+        [agg_label]
+    )[0][0]
+    data_column_quoted_identifiers = set(
+        new_frame.data_column_snowflake_quoted_identifiers
+    ) - {agg_identifier}
+
+    # to replicate NULL behavior like in other APIs, preserve NULLs here
+    new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+        {
+            quoted_identifier: coalesce(
+                col(quoted_identifier) == col(agg_identifier),
+                pandas_lit(False),
+            )
+            for quoted_identifier in data_column_quoted_identifiers
+        }
+    ).frame
+
+    # local import to avoid circular import
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    # return internal frame but remove temporary agg column.
+    return SnowflakeQueryCompiler(new_frame).drop(columns=[agg_label])._modin_frame
+
+
+def compute_isin_with_dataframe(
+    frame: InternalFrame, values_frame: InternalFrame
+) -> InternalFrame:
+    """
+    Computes new InternalFrame holding the result of DataFrame.isin(<DataFra e obj>).
+
+    Note that frame must be a non-empty DataFrame, i.e. frame must have row_count > 0.
+    Assumes further that index.is_unique() holds for values_frame.
+
+    Args:
+        frame: InternalFrame, lhs of the isin operation.
+        values_series: InternalFrame representing the DataFrame object (rhs)
+
+    Returns:
+        InternalFrame
+    """
+    # similar logic to series, however do not create a single column but multiple colunms
+    # set values via set_frame_2d_labels then
+
+    # duplicate all matching column labels, then overwrite with new value using set_frame_2d_labels,
+    self_data_labels = frame.data_column_pandas_labels
+    values_data_labels = values_frame.data_column_pandas_labels  # type: ignore[union-attr]
+
+    # now generate new labels for matching column and prefix with isin_
+    # produce here pandas compatible error that is commented in dataframe.py:
+    # if not (values.columns.is_unique and values.index.is_unique):
+    #    raise ValueError("cannot compute isin with a duplicate axis.")
+    if not is_duplicate_free(values_data_labels):
+        raise ValueError("cannot compute isin with a duplicate axis.")
+
+    unique_matching_labels = sorted(
+        list(set(values_data_labels) & set(self_data_labels))
+    )
+
+    new_labels = generate_new_labels(
+        pandas_labels=[f"isin_{label}" for label in unique_matching_labels],
+        excluded=self_data_labels,
+    )
+    new_ordered_frame = frame.ordered_dataframe
+    new_identifiers = new_ordered_frame.generate_snowflake_quoted_identifiers(
+        pandas_labels=new_labels
+    )
+
+    # For each column in values_frame, for which a matching label in frame exists, append
+    # a column with NULL
+    new_ordered_frame = append_columns(
+        new_ordered_frame,
+        new_identifiers,
+        [pandas_lit(None)] * len(new_identifiers),
+    )
+
+    # Append duplicate columns and create new internal frame from it.
+    new_frame = InternalFrame.create(
+        ordered_dataframe=new_ordered_frame,
+        data_column_pandas_labels=frame.data_column_pandas_labels + new_labels,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers
+        + new_identifiers,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+    )
+
+    # local import to avoid circular import
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    values_frame_with_matching_columns_only = (
+        SnowflakeQueryCompiler(values_frame)
+        .drop(  # type: ignore[union-attr]
+            None,
+            columns=list(
+                set(values_frame.data_column_pandas_labels)
+                - set(unique_matching_labels)
+            ),
+        )
+        ._modin_frame
+    )
+
+    new_frame = set_frame_2d_labels(
+        new_frame,
+        slice(None),
+        new_labels,
+        values_frame_with_matching_columns_only,
+        False,
+        True,
+        False,
+        False,
+    )
+
+    isin_identifiers = [
+        group[0]
+        for group in new_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            new_labels, False
+        )
+    ]
+
+    # create pairs now, i.e. which original identifier to compare with which isin identifier.
+    data_pairs = [
+        (label, identifier)
+        for label, identifier in zip(
+            new_frame.data_column_pandas_labels,
+            new_frame.data_column_snowflake_quoted_identifiers,
+        )
+        if label in unique_matching_labels
+    ]
+    isin_lookup = dict(zip(unique_matching_labels, isin_identifiers))
+
+    pairs = [(identifier, isin_lookup[label]) for label, identifier in data_pairs]
+
+    # replace by default all entries with False to reach pandas compatibility
+    replace_dict = {
+        quoted_identifier: pandas_lit(False)
+        for quoted_identifier in new_frame.data_column_snowflake_quoted_identifiers
+    }
+    # matching columns are updated based on the match from the set_frame_2d
+    replace_dict.update(
+        {
+            quoted_identifier: coalesce(
+                col(quoted_identifier) == col(isin_quoted_identifier),
+                pandas_lit(False),
+            )
+            for quoted_identifier, isin_quoted_identifier in pairs
+        }
+    )
+
+    new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+        replace_dict
+    ).frame
+
+    # return query compiler but remove temporary agg column.
+    return SnowflakeQueryCompiler(new_frame).drop(columns=new_labels)._modin_frame
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py
new file mode 100644
index 00000000000..ef82b8d7431
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py
@@ -0,0 +1,1268 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from collections.abc import Hashable, Sequence
+from enum import Enum, IntFlag, auto
+from typing import NamedTuple, Optional, Union, get_args
+
+import pandas.core.common as common
+from pandas._typing import IndexLabel, Suffixes
+
+from snowflake.snowpark._internal.utils import generate_random_alphanumeric
+from snowflake.snowpark.functions import coalesce, to_variant
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.type_utils import (
+    is_compatible_snowpark_types,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    ORDERING_COLUMN_LABEL,
+    append_columns,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+)
+from snowflake.snowpark.modin.plugin._typing import AlignTypeLit, JoinTypeLit
+from snowflake.snowpark.modin.plugin.compiler import snowflake_query_compiler
+from snowflake.snowpark.types import VariantType
+
+
+class JoinKeyCoalesceConfig(Enum):
+    # replace lkey with coalesce(lkey, rkey) and remove rkey from merged frame.
+    LEFT = "left"
+    # replace rkey with coalesce(rkey, lkey) and remove lkey from merged frame.
+    RIGHT = "right"
+    # no coalesce is performed.
+    NONE = "none"
+
+
+class InheritJoinIndex(IntFlag):
+    FROM_LEFT = auto()
+    FROM_RIGHT = auto()
+    FROM_BOTH = FROM_LEFT | FROM_RIGHT
+
+
+class JoinOrAlignResultColumnMapper:
+    """
+    Join or Align result helper class that keeps the quoted identifier mapping from the original left
+    and right dataframe to the result dataframe of the join or align.
+    """
+
+    # Map from the quoted identifiers of the original left frame to the quoted
+    # identifiers of corresponding columns in the result frame.
+    left_quoted_identifiers_map: dict[str, str]
+    # Map from the quoted identifiers of the original right frame to the quoted
+    # identifiers of corresponding columns in the result frame.
+    right_quoted_identifiers_map: dict[str, str]
+
+    def __init__(
+        self,
+        left_quoted_identifiers_map: dict[str, str],
+        right_quoted_identifiers_map: dict[str, str],
+    ) -> None:
+        self.left_quoted_identifiers_map = left_quoted_identifiers_map
+        self.right_quoted_identifiers_map = right_quoted_identifiers_map
+
+    def map_left_quoted_identifiers(self, quoted_identifiers: list[str]) -> list[str]:
+        """
+        For a given set of quoted_identifiers from the original left frame, find the corresponding
+        columns in the join or align result frame.
+        """
+        return [
+            self.left_quoted_identifiers_map[quoted_identifier]
+            for quoted_identifier in quoted_identifiers
+        ]
+
+    def map_right_quoted_identifiers(self, quoted_identifiers: list[str]) -> list[str]:
+        """
+        For a given set of quoted_identifiers from the original right frame, find the corresponding
+        columns in the join or align result frame.
+        """
+        return [
+            self.right_quoted_identifiers_map[quoted_identifier]
+            for quoted_identifier in quoted_identifiers
+        ]
+
+
+class JoinOrAlignInternalFrameResult(NamedTuple):
+    # The InternalFrame representation for the join or align result
+    result_frame: InternalFrame
+    # A column mapper that provides mapping from the column snowflake quoted identifiers of the
+    # left and right frame to the corresponding mapped column snowflake quoted identifiers in the
+    # result frame. The mapper contains mapping for index, data, ordering columns, and row position
+    # column if exists.
+    result_column_mapper: JoinOrAlignResultColumnMapper
+
+
+def join(
+    left: InternalFrame,
+    right: InternalFrame,
+    how: JoinTypeLit,
+    left_on: list[str],
+    right_on: list[str],
+    sort: Optional[bool] = False,
+    join_key_coalesce_config: Optional[list[JoinKeyCoalesceConfig]] = None,
+    inherit_join_index: InheritJoinIndex = InheritJoinIndex.FROM_LEFT,
+) -> JoinOrAlignInternalFrameResult:
+    """
+    Join ``left`` and ``right`` frames.
+
+    Args:
+        left: An internal frame to use on left side of join.
+        right: An internal frame to use on right side of join.
+        how: Type of join. Can be any of {'left', 'right', 'outer', 'inner', 'cross'}
+        left_on: List of snowflake identifiers to join on from 'left' frame.
+        right_on: List of snowflake identifiers to join on from 'right' frame.
+            left_on and right_on must be lists of equal length.
+        sort: If True order merged frame on join keys. If False, ordering behavior
+            depends on join type as follows:
+            For "right" join use right ordering and then left ordering.
+            For every other type of join use left ordering and then right ordering
+        join_key_coalesce_config: Optional list of coalesce config to indicate how to
+            coalesce join columns in output frame or not. If provided, length of this
+            list must be same as length of 'left_on'. If not provided, no coalesce is
+            performed on join columns.
+            Coalesce config can have the following values:
+            - LEFT: replace left key with coalesce(lkey, rkey) and remove right key from
+              merged frame.
+            - RIGHT: replace right key with coalesce(lkey, rkey) and remove left key
+              from merged frame.
+            - NONE: no coalesce is performed.
+        inherit_join_index:
+            Indicates how to create index for merged frame.
+            If FROM_LEFT, inherit from left frame.
+            if FROM_RIGHT: inherit from right frame.
+            if FROM_BOTH: inherit from left and right both.
+
+    Returns:
+        JoinOrAlignInternalFrameResult which is a NamedTuple contains the following:
+            A InternalFrame for the InternalFrame join result.
+            A JoinOrAlignResultColumnMapper that provides quoted identifiers mapping from the
+                original left and right dataframe to the joined dataframe, it is guaranteed to
+                include mapping for index + data columns, ordering columns and row position column
+                if exists.
+    """
+    assert len(left_on) == len(
+        right_on
+    ), "left_on and right_on must be of same length or both be None"
+    if join_key_coalesce_config is not None:
+        assert len(join_key_coalesce_config) == len(
+            left_on
+        ), "join_key_coalesce_config must be of same length as left_on and right_on"
+    assert how in get_args(
+        JoinTypeLit
+    ), f"Invalid join type: {how}. Allowed values are {get_args(JoinTypeLit)}"
+
+    # Re-project the active columns to make sure all active columns of the internal frame participate
+    # in the join operation, and unnecessary columns are dropped from the projected columns.
+    left = left.select_active_columns()
+    right = right.select_active_columns()
+
+    joined_ordered_dataframe = left.ordered_dataframe.join(
+        right.ordered_dataframe, left_on_cols=left_on, right_on_cols=right_on, how=how
+    )
+
+    return _create_internal_frame_with_join_or_align_result(
+        joined_ordered_dataframe,
+        left,
+        right,
+        how,
+        left_on,
+        right_on,
+        sort,
+        join_key_coalesce_config,
+        inherit_join_index,
+    )
+
+
+def _create_internal_frame_with_join_or_align_result(
+    result_ordered_frame: OrderedDataFrame,
+    left: InternalFrame,
+    right: InternalFrame,
+    how: Union[JoinTypeLit, AlignTypeLit],
+    left_on: list[str],
+    right_on: list[str],
+    sort: Optional[bool] = False,
+    key_coalesce_config: Optional[list[JoinKeyCoalesceConfig]] = None,
+    inherit_index: InheritJoinIndex = InheritJoinIndex.FROM_LEFT,
+) -> JoinOrAlignInternalFrameResult:
+    """
+    Given the join or align result (result_ordered_frame), and the original left InternalFrame and right
+    InternalFrame along with other join/align information, create the final result InternalFrame with
+    all fields set correctly.
+
+    Args:
+        result_ordered_frame: OrderedDataFrame. The ordered dataframe result for the join/align operation.
+        left: InternalFrame. The original left internal frame used for the join/align.
+        right: InternalFrame. The original right internal frame used for the join/align.
+        left_on: List[str]. The columns in original left internal frame used for join/align.
+        right_on: List[str]. The columns in original right internal frame used for join/align.
+        how: Union[JoinTypeLit, AlignTypeLit] join or align type.
+        sort: Optional[bool] = False. Whether to sort the result lexicographically on the join/align keys.
+        key_coalesce_config: Optional[List[JoinKeyCoalesceConfig]]. Optional list of coalesce config to
+            indicate how to coalesce join/align columns in output frame or not. If provided, length of this
+            list must be same as length of 'left_on'. If not provided, no coalesce is performed.
+        inherit_index: InheritJoinIndex. Indicates how to create index for the merged frame.
+            If FROM_LEFT, inherit from left frame.
+            if FROM_RIGHT: inherit from right frame.
+            if FROM_BOTH: inherit from left and right both.
+
+    Returns:
+        InternalFrame for the join/aligned result with all fields set accordingly.
+    """
+
+    result_helper = JoinOrAlignOrderedDataframeResultHelper(
+        left.ordered_dataframe,
+        right.ordered_dataframe,
+        result_ordered_frame,
+        left_on,
+        right_on,
+        how=how,
+        sort=sort,
+    )
+    # get the join or aligned result with sort configuration
+    result_ordered_frame = result_helper.join_or_align_result
+
+    # Ordering behavior for data columns: left data columns + right data columns
+    data_column_pandas_labels = (
+        left.data_column_pandas_labels + right.data_column_pandas_labels
+    )
+    data_column_snowflake_quoted_identifiers = (
+        result_helper.map_left_quoted_identifiers(
+            left.data_column_snowflake_quoted_identifiers
+        )
+        + result_helper.map_right_quoted_identifiers(
+            right.data_column_snowflake_quoted_identifiers
+        )
+    )
+
+    index_column_pandas_labels = []
+    index_column_snowflake_quoted_identifiers = []
+
+    left_quoted_identifiers_map = (
+        result_helper.result_column_mapper.left_quoted_identifiers_map.copy()
+    )
+    right_quoted_identifiers_map = (
+        result_helper.result_column_mapper.right_quoted_identifiers_map.copy()
+    )
+
+    # inherit_join_index is a flag for which either FROM_LEFT, FROM_RIGHT or both can be set
+    # to check whether FROM_LEFT, FROM_RIGHT or FROM_LEFT and FROM_RIGHT apply use in similar to
+    # & in C/C++ when checking a flag
+    if InheritJoinIndex.FROM_LEFT in inherit_index:
+        index_column_pandas_labels.extend(left.index_column_pandas_labels)
+        index_column_snowflake_quoted_identifiers.extend(
+            result_helper.map_left_quoted_identifiers(
+                left.index_column_snowflake_quoted_identifiers,
+            )
+        )
+    if InheritJoinIndex.FROM_RIGHT in inherit_index:
+        index_column_pandas_labels.extend(right.index_column_pandas_labels)
+        index_column_snowflake_quoted_identifiers.extend(
+            result_helper.map_right_quoted_identifiers(
+                right.index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    if key_coalesce_config:
+        coalesce_column_identifiers = []
+        coalesce_column_values = []
+        for origin_left_col, origin_right_col, coalesce_config in zip(
+            left_on, right_on, key_coalesce_config
+        ):
+            if coalesce_config == JoinKeyCoalesceConfig.NONE:
+                continue
+            left_col = result_helper.map_left_quoted_identifiers([origin_left_col])[0]
+            right_col = result_helper.map_right_quoted_identifiers([origin_right_col])[
+                0
+            ]
+            # Coalescing is only required for 'outer' join or align.
+            # For 'inner' and 'left' join we use left join keys and for 'right' join we
+            # use right join keys.
+            # For 'left' and 'coalesce' align we use left join keys.
+            if how == "outer":
+                # Generate an expression equivalent of
+                # "COALESCE('left_col', 'right_col') as 'left_col'"
+                coalesce_column_identifier = (
+                    result_ordered_frame.generate_snowflake_quoted_identifiers(
+                        pandas_labels=[
+                            extract_pandas_label_from_snowflake_quoted_identifier(
+                                left_col
+                            )
+                        ],
+                    )[0]
+                )
+                coalesce_column_identifiers.append(coalesce_column_identifier)
+                coalesce_column_values.append(coalesce(left_col, right_col))
+            elif how == "right":
+                # No coalescing required for 'right' join. Simply use right join key
+                # as output column.
+                coalesce_column_identifier = right_col
+            elif how in ("inner", "left", "coalesce"):
+                # No coalescing required for 'left' or 'inner' join and for 'left' or
+                # 'coalesce' align. Simply use left join key as output column.
+                coalesce_column_identifier = left_col
+            else:
+                raise AssertionError(f"Unsupported join/align type {how}")
+
+            if coalesce_config == JoinKeyCoalesceConfig.RIGHT:
+                # swap left_col and right_col
+                left_col, right_col = right_col, left_col
+
+            # To provide same behavior as native pandas, remove duplicate join column.
+            if right_col in data_column_snowflake_quoted_identifiers:
+                # Remove duplicate data column.
+                index = data_column_snowflake_quoted_identifiers.index(right_col)
+                data_column_snowflake_quoted_identifiers.pop(index)
+                data_column_pandas_labels.pop(index)
+            elif right_col in index_column_snowflake_quoted_identifiers:
+                # Remove duplicate index column if present.
+                index = index_column_snowflake_quoted_identifiers.index(right_col)
+                index_column_snowflake_quoted_identifiers.pop(index)
+                index_column_pandas_labels.pop(index)
+
+            # Update data/index column identifier
+            data_column_snowflake_quoted_identifiers = [
+                coalesce_column_identifier if x == left_col else x
+                for x in data_column_snowflake_quoted_identifiers
+            ]
+            index_column_snowflake_quoted_identifiers = [
+                coalesce_column_identifier if x == left_col else x
+                for x in index_column_snowflake_quoted_identifiers
+            ]
+            # map the original left and right col to the new coalesced column
+            left_quoted_identifiers_map[origin_left_col] = coalesce_column_identifier
+            right_quoted_identifiers_map[origin_right_col] = coalesce_column_identifier
+
+        if coalesce_column_identifiers:
+            # This might change order of identifiers in snowpark dataframe. But we
+            # don't depend on order of identifiers in snowpark dataframe so, it's okay to
+            # do this.
+            result_ordered_frame = append_columns(
+                result_ordered_frame,
+                coalesce_column_identifiers,
+                coalesce_column_values,
+            )
+
+    if not is_column_index_compatible(left, right):
+        # Flatten column labels if joining frames have incompatible index levels
+        # Example:
+        # >>> import pandas as pd
+        # >>> df1 = pd.DataFrame(['x', 'y'], columns=pd.MultiIndex.from_tuples([('A', 0)]))
+        # >>> df2 = pd.DataFrame({"B": [0, 1]})
+        # >>> df1.join(df2)
+        # 	 (A, 0)	B
+        # 0	  x	    0
+        # 1	  y	    1
+
+        # Number of column index levels are decided by length of
+        # 'data_column_pandas_index_names'. So setting it to an array of length one
+        # will flatten column index levels in resultant InternalFrame.
+        data_column_pandas_index_names = [None]
+    else:
+        data_column_pandas_index_names = left.data_column_pandas_index_names
+
+    result_internal_frame = InternalFrame.create(
+        ordered_dataframe=result_ordered_frame,
+        data_column_pandas_labels=data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=data_column_pandas_index_names,
+    )
+    result_column_mapper = JoinOrAlignResultColumnMapper(
+        left_quoted_identifiers_map,
+        right_quoted_identifiers_map,
+    )
+
+    return JoinOrAlignInternalFrameResult(result_internal_frame, result_column_mapper)
+
+
+def is_column_index_compatible(left: InternalFrame, right: InternalFrame) -> bool:
+    """
+    Return true if column index of 'right' frame is compatible with column index of
+    'left' frame. Column index is considered compatible if
+    1. Both the frames have same number of column index levels OR
+    2. Right column index has one level but, all the labels in it are tuple with length
+       same as number of levels in left frame.
+    Args:
+        left: the left internal frame to check the index against
+        right: the right internal frame to check the index compatibility for
+
+    Returns:
+        True if column index of 'right' frame is compatible with column index of
+        'left' frame, False otherwise.
+    """
+    if left.num_index_levels(axis=1) == right.num_index_levels(axis=1):
+        return True
+    # Check if all labels in 'right' frame are tuples with length same as number of
+    # levels in left frame.
+    left_num_levels = left.num_index_levels(axis=1)
+    if right.num_index_levels(axis=1) == 1 and all(
+        [
+            isinstance(label, tuple) and len(label) == left_num_levels
+            for label in right.data_column_pandas_labels
+        ]
+    ):
+        return True
+    return False
+
+
+def rename_conflicting_data_column_labels(
+    left: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    right: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    common_join_keys: list[Hashable],
+    suffixes: Suffixes,
+) -> tuple[InternalFrame, InternalFrame]:
+    """
+    Rename conflicting data column labels from given query compilers.
+    Conflicting here means if same column label is present in both the frames.
+
+    Same as native pandas we follow these rules when renaming conflicting labels.
+    1. Suffix is added to labels only if there is conflict. We don't add it all the
+      labels. For example left frame with columns ["A", "B", "C"] is merged with right
+      frame with columns ["A", "C", "D"] as
+      left.merge(right, on="A", suffixes=("_x". "_y")).
+      This will result in a frame with columns ["A", "B", "C_x", "C_y", "D"]. Here "A"
+      is common_join_key hence coalesced in merged frame. "B" and "D" has no conflicts.
+      "C" has conflict these are renamed to "C_x" and "C_y" for left and right frame
+      respectively.
+    2. Even though we check for the whole label to detect conflict, when we apply
+      rename, it is applied to the first level that is the same as the conflict label.
+      In case of multiIndex columns, suffix is added to only first level. For example
+      a conflicting label ("A", "a") will become ("A_x", "a") and ("A_y", "a") in left
+      and right frames respectively.
+    3. When suffix is added to non-str label component it will change to str. For
+      example a conflicting label (1, 2) will become ("1_x", 2) and ("1_y", 2) in left
+      and right frames respectively.
+    4. When we rename a conflicting level, we rename all the labels with same first
+      level. So as a side effect we might rename column labels which were not really
+      conflicting.
+      Consider following scenario as an example:
+      Columns in left frame: [("A", 1), ("A", 2), ("B", 3)]
+      Columns in right frame: [("A", 1), ("B", 3), ("C", 4)]
+      After performing left.merge(right, on=[("B", 3)] operation
+      Columns in merged frame: [("A_x", 1), ("A_x", 2), ("B", 3), ("A_y", 1), ("C", 4)]
+      In above example second column we have a conflict for ("A", 1) that results in
+      renamed first level from "A" to "A_x" for the columns in left frame. As a  result
+      ("A", 2) was renamed to ("A_x", 2) even though there was no conflicting label in
+      right frame.
+      Also note that in above example ("B", 3) was present in both frames but this is
+      the common join key so this is not renamed and merged frame will have only one
+      column with coalesced values.
+
+    Args:
+        left: Left query compiler to merge.
+        right:  Right query compiler to merge.
+        common_join_keys: A list of common join labels.
+        suffixes: Suffix to addd to conflicting data column labels.
+
+    Returns:
+        Tuple of left and right InternalFrame with renamed labels.
+
+    """
+    first_level_of_conflicting_labels = []
+    for label in left._modin_frame.data_column_pandas_labels:
+        if (
+            label not in common_join_keys
+            and label in right._modin_frame.data_column_pandas_labels
+        ):
+            first_level = label[0] if left.is_multiindex(axis=1) else label
+            first_level_of_conflicting_labels.append(first_level)
+
+    if not first_level_of_conflicting_labels:
+        # If no conflicts, return frames from original query compilers.
+        return left._modin_frame, right._modin_frame
+
+    if suffixes[0] is not None:
+        left = left.rename(
+            columns_renamer=lambda x: str(x) + str(suffixes[0])
+            if x in first_level_of_conflicting_labels
+            else x,
+            level=0,  # Rename only first level of columns labels to be consistent with native pandas.
+        )
+    if suffixes[1] is not None:
+        right = right.rename(
+            columns_renamer=lambda x: str(x) + str(suffixes[1])
+            if x in first_level_of_conflicting_labels
+            else x,
+            level=0,  # Rename only first level of columns labels to be consistent with native pandas.
+        )
+    return left._modin_frame, right._modin_frame
+
+
+def map_labels_to_renamed_frame(
+    original_labels: list[Hashable],
+    original_frame: InternalFrame,
+    renamed_frame: InternalFrame,
+) -> list[Hashable]:
+    """
+    Args:
+        original_labels: A list of pandas labels.
+        original_frame: Original frame. Given 'original_labels' belong to this frame.
+        renamed_frame: Renamed frame. Frame created after renaming zero or more labels
+            of 'original_frame'.
+
+    Returns:
+        A list of pandas labels from renamed_frame which maps to 'original_labels' from
+        'original_frame'.
+    """
+    # Create a map from original pandas labels to renamed pandas labels. This map
+    # contains all data labels even if they were not renamed.
+    renamed_labels_map = dict(
+        zip(
+            original_frame.data_column_pandas_labels,
+            renamed_frame.data_column_pandas_labels,
+        )
+    )
+    # 'original_labels' might have index labels as well, such labels will not have a
+    # corresponding entry in 'renamed_labels_map'. For such labels keep the original
+    # value.
+    return [renamed_labels_map.get(x, x) for x in original_labels]
+
+
+def get_join_keys(
+    left: InternalFrame,
+    right: InternalFrame,
+    on: Optional[IndexLabel] = None,
+    left_on: Optional[IndexLabel] = None,
+    right_on: Optional[IndexLabel] = None,
+    left_index: Optional[bool] = False,
+    right_index: Optional[bool] = False,
+) -> tuple[Sequence[Hashable], Sequence[Hashable]]:
+    """
+    Get join keys (pandas labels) for given frames using join arguments.
+    This method doesn't do any error checking For example, 'on' and 'left_on' both
+    are provided etc. This method assumes caller has already done error checking and
+    inputs arguments are valid.
+
+    Args:
+        left: Dataframe on left side of join.
+        right: Dataframe on right side of join.
+        on: Labels or list of such to join on.
+        left_on: Labels or list of such to join on in the left frame.
+        right_on: Labels or list of such to join on in the right frame.
+        left_index: If True, use index from left frame as join keys.
+        right_index: If True, use index from right frame as join keys.
+
+    Returns:
+        A tuple of two sequences. Where first sequence is join_keys from left frame and
+        second sequence is join_keys from right frame.
+
+    """
+    # If no join columns are provided we join on common data columns from both the
+    # frames.
+    # If there are no common columns it's an error condition which is already taken care
+    # of by frontend layer. So here 'on' will be a non-empty list.
+    # Take ordered intersection. If sort=True, we need to preserve order of join keys
+    # to create ordering columns in correct order.
+    if not (on or left_on or right_on or left_index or right_index):
+        right_labels = set(right.data_column_pandas_labels)
+        on = [
+            label for label in left.data_column_pandas_labels if label in right_labels
+        ]
+
+    # Populate left join keys.
+    left_keys = []
+    if on is not None:
+        left_keys = on
+    elif left_on is not None:
+        left_keys = left_on
+    elif left_index:
+        left_keys = left.index_column_pandas_labels
+
+    # Populate right join keys.
+    right_keys = []
+    if on is not None:
+        right_keys = on
+    elif right_on is not None:
+        right_keys = right_on
+    elif right_index:
+        right_keys = right.index_column_pandas_labels
+
+    # Convert 'left_keys' and 'right_keys' to list if not a list or tuple.
+    if not isinstance(left_keys, (list, tuple)):
+        left_keys = [left_keys]
+    if not isinstance(right_keys, (list, tuple)):
+        right_keys = [right_keys]
+
+    return left_keys, right_keys
+
+
+def insert_external_join_keys_into_join_frames(
+    left_query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    left_keys: Sequence[
+        Union[Hashable, "snowflake_query_compiler.SnowflakeQueryCompiler"]
+    ],
+    right_query_compiler: "snowflake_query_compiler.SnowflakeQueryCompiler",
+    right_keys: Sequence[
+        Union[Hashable, "snowflake_query_compiler.SnowflakeQueryCompiler"]
+    ],
+) -> tuple[
+    "snowflake_query_compiler.SnowflakeQueryCompiler",
+    list[Hashable],
+    "snowflake_query_compiler.SnowflakeQueryCompiler",
+    list[Hashable],
+    list[str],
+]:
+    """
+    Insert external data join keys as columns into corresponding query compilers.
+    Args:
+        left_query_compiler: the query compiler for left side of join
+        left_keys: sequence of left join keys.
+        right_query_compiler: the query compiler for right side of join
+        right_keys: sequence of right join keys.
+
+    Returns:
+      A tuple of
+        1. Updated left snowflake query compiler.
+        2. Updated list of left join keys. All elements are pandas labels.
+        3. Updated right snowflake query compiler.
+        4. Update list of right join keys. All elements are pandas labels.
+        5. List of pandas labels for all external join keys.
+    """
+    updated_left_keys = []
+    updated_right_keys = []
+    key_suffix = 0
+    external_key_labels = []
+    for lkey, rkey in zip(left_keys, right_keys):
+        is_left_qc = isinstance(lkey, snowflake_query_compiler.SnowflakeQueryCompiler)
+        is_right_qc = isinstance(rkey, snowflake_query_compiler.SnowflakeQueryCompiler)
+        if is_left_qc and is_right_qc:
+            # Error checking should already be done in frontend to ensure that key_<N>
+            # label does not conflict with existing data column in dataframe. This
+            # column will show up in merged frame.
+            # Note: Label generation logic here is same as native pandas.
+            key_label = f"key_{key_suffix}"
+            key_suffix = key_suffix + 1
+        elif is_left_qc or is_right_qc:
+            # Generate a label with random suffix to avoid any conflict with existing
+            # data labels. This column is only temporary, during merge operation
+            # values of this column will be coalesced with other join key and this
+            # column is removed from merged frame.
+            key_label = f"join_key_{generate_random_alphanumeric()}"
+        else:
+            # will not be used
+            key_label = None
+
+        if key_label is not None:
+            external_key_labels.append(key_label)
+
+        if is_left_qc:
+            left_query_compiler = left_query_compiler.insert(0, key_label, lkey)
+            lkey = key_label
+        updated_left_keys.append(lkey)
+
+        if is_right_qc:
+            right_query_compiler = right_query_compiler.insert(0, key_label, rkey)
+            rkey = key_label
+        updated_right_keys.append(rkey)
+    return (
+        left_query_compiler,
+        updated_left_keys,
+        right_query_compiler,
+        updated_right_keys,
+        external_key_labels,
+    )
+
+
+class IndexJoinInfo(NamedTuple):
+    """
+    The information required to perform join on index operation.
+    """
+
+    # the snowflake quoted identifiers for the index columns to perform join on in the left frame
+    left_join_quoted_identifiers: list[str]
+    # the snowflake quoted identifiers for the index columns to perform join on in the right frame
+    right_join_quoted_identifiers: list[str]
+    # the expected pandas labels in order for index columns in the result frame
+    result_index_labels: list[Hashable]
+
+
+def _get_index_columns_to_join(
+    left: InternalFrame,
+    right: InternalFrame,
+) -> IndexJoinInfo:
+    """
+    Decide the index columns that need to participate in join. Depends on single or multiindex situation
+    of left and right, following rules are applied when deriving the index columns to join (which is
+    the same as Native pandas):
+    1. If both are single index:
+        Join on index column.
+        Index column label is inherited from left frame
+        NOTE: In pandas 1.5.3 index column label is reset to None but this is
+            fixed in pandas 2.x). We follow pandas 2.x behavior here which is also
+            consistent with joining on data columns.
+        Index values from both frames are coalesced together to produce index column of
+        merged frame.
+
+    2. If both are multiindex and left.index_column_pandas_labels == right.index_column_pandas_labels
+        Join on all index columns.
+        Index columns labels are inherited from left frame.
+        Index values from both frames are coalesced together to produce index column of
+        merged frame.
+        NOTE: this case is similar to case #1 in behavior.
+
+    3. If both are multiindex and left.index_column_pandas_labels != right.index_column_pandas_labels:
+        Join on common index columns.
+        Index of merged frame contains index columns from both the frames (so it will
+        have more index columns than any of input frames) but common index columns are
+        not duplicated and values for such index columns are also coalesces together.
+        (This is similar to joining on data columns when join labels is same on both sides)
+        Order of index columns in output frame =
+            common index columns (value are coalesced)
+            + remaining left index columns
+            + remaining right index columns
+        Reference: https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/indexes/base.py#L4764
+        Example:
+           left: index = ['A', 'B']   columns = ['c1', 'c2']
+           right: index = ['B', 'C'] columns = ['c3']
+           left.merge(right, left_index=True, right_index=True)
+           join condition: left.B == right.B
+           result: index = ['B', 'A', 'C'] columns = ['c1', 'c2', 'c3']
+
+    4. If only one frame has multiindex:
+        This is same as case #2 above except for order of index columns in output frame.
+        In this case order of index columns is inherited from multiindex of input frame
+        (left for right doesn't matter).
+        Reference: https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/indexes/base.py#L4789
+        Example 1 (multi to single):
+            left: index = ['A', 'B']   columns = ['c1', 'c2']
+            right: index = ['B'] columns = ['c3']
+            left.merge(right, left_index=True, right_index=True)
+            join condition: left.B == right.B
+            result: index = ['A', 'B'] columns = ['c1', 'c2', 'c3']
+
+        Example 2 (single to multi):
+            left: index = ['B']   columns = ['c1', 'c2']
+            right: index = ['A', 'B'] columns = ['c3']
+            left.merge(right, left_index=True, right_index=True)
+            join condition: left.B == right.B
+            result: index = ['A', 'B'] columns = ['c1', 'c2', 'c3']
+
+    Args:
+        left: Dataframe on left side of join.
+        right: Dataframe on right side of join.
+
+    Returns:
+        Tuple contains:
+            1) the quoted identifier for index columns in left used for join
+            2) the quited identifier for index columns in right used for join
+            3) the final pandas labels for index columns of the join result
+    """
+    is_left_multiindex = left.is_multiindex(axis=0)
+    is_right_multiindex = right.is_multiindex(axis=0)
+    if (
+        not is_left_multiindex
+        and not is_right_multiindex
+        or (left.index_column_pandas_labels == right.index_column_pandas_labels)
+    ):
+        # Case 1 & Case 2
+        left_ids = left.index_column_snowflake_quoted_identifiers
+        right_ids = right.index_column_snowflake_quoted_identifiers
+        expected_index_labels = left.index_column_pandas_labels
+    else:
+        # Case 3 and Case 4 (They only differ on output order of index columns)
+        #
+        # When joining on index, if either frame has multiindex and index column labels
+        # are different we join on common index labels.
+        #
+        # Take ordered intersection of left index columns and right index columns.
+        # This order is required to construct index of merged frame.
+        right_labels = set(right.index_column_pandas_labels)
+        common_labels = [
+            left_label
+            for left_label in left.index_column_pandas_labels
+            if left_label in right_labels
+        ]
+        if common.any_none(common_labels):
+            # 'None' does not participate when computing overlapping index names.
+            # So it can not be only overlapping index label.
+            # https://github.com/pandas-dev/pandas/blob/v1.5.3/pandas/core/indexes/base.py#L4729
+            # This is already handled by frontend layer. Add this check for extra
+            # safety.
+            assert (
+                len(common_labels) > 1
+            ), "'None' can not be only overlapping index label"
+
+        if len(common_labels) == 0:
+            raise ValueError("cannot join with no overlapping index names")
+        # Error checking for duplicate labels is already done in frontend layer, so
+        # it's safe to use first element from mapped identifiers.
+        left_ids = [
+            ids[0]
+            for ids in left.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                common_labels
+            )
+        ]
+        right_ids = [
+            ids[0]
+            for ids in right.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                common_labels
+            )
+        ]
+        if is_left_multiindex and is_right_multiindex:
+            # Case 3
+            # Order of index columns in output frame =
+            #     common index columns (value are coalesced)
+            #       + remaining left index columns
+            #       + remaining right index columns
+            left_remaining_labels = [
+                label
+                for label in left.index_column_pandas_labels
+                if label not in common_labels
+            ]
+            right_remaining_labels = [
+                label
+                for label in right.index_column_pandas_labels
+                if label not in common_labels
+            ]
+            expected_index_labels = (
+                common_labels + left_remaining_labels + right_remaining_labels
+            )
+        else:
+            # Case 4
+            expected_index_labels = (
+                left.index_column_pandas_labels
+                if is_left_multiindex
+                else right.index_column_pandas_labels
+            )
+
+    return IndexJoinInfo(
+        left_join_quoted_identifiers=left_ids,
+        right_join_quoted_identifiers=right_ids,
+        result_index_labels=expected_index_labels,
+    )
+
+
+def _reorder_index_columns(
+    frame: InternalFrame, target_index_labels: list[Hashable]
+) -> InternalFrame:
+    """
+    Reorder the index column for a given InternalFrame to the target_index_labels.
+
+    Note: the reorder is only valid when 1) the length of index column of the frame is the
+        same as the target_index_labels. 2) all labels in target_index_labels are unique and
+        occurs inside the index column of the frame.
+
+    Args:
+        frame: InternalFrame. The internal frame whose index needs to be reordered.
+        target_index_labels: List[Hashable]. The final index label order.
+
+    Returns:
+        An InternalFrame with index columns reordered to the target.
+    """
+    # Returned frame from join_utils has index columns in order:
+    # left index columns + right index columns
+    # Update it according to expected order
+    current_index_column_pandas_labels = frame.index_column_pandas_labels
+    if current_index_column_pandas_labels != target_index_labels:
+        # reorder needed
+        assert len(target_index_labels) == len(current_index_column_pandas_labels)
+        assert len(current_index_column_pandas_labels) == len(
+            set(current_index_column_pandas_labels)
+        ), "reorder index columns with duplication is not allowed"
+
+        index_column_snowflake_quoted_identifiers = []
+        for label in target_index_labels:
+            assert (
+                label in current_index_column_pandas_labels
+            ), f"can not find column with label {label}"
+            i = current_index_column_pandas_labels.index(label)
+            index_column_snowflake_quoted_identifiers.append(
+                frame.index_column_snowflake_quoted_identifiers[i]
+            )
+        return InternalFrame.create(
+            ordered_dataframe=frame.ordered_dataframe,
+            index_column_pandas_labels=target_index_labels,
+            index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        )
+    else:
+        return frame
+
+
+def join_on_index_columns(
+    left: InternalFrame,
+    right: InternalFrame,
+    how: JoinTypeLit,
+    sort: bool,
+) -> JoinOrAlignInternalFrameResult:
+    """
+    Perform join operation on index columns with the specified method (`how`), and preserves order based on sort.
+    Refer to _get_index_columns_to_join for details about how index columns used for join is decided.
+
+    Args:
+        left: Dataframe on left side of join.
+        right: Dataframe on right side of join.
+        how: Join type.
+        sort: If True, sort the result on join columns.
+
+    Returns:
+        An InternalFrame for the joined result.
+        A JoinOrAlignResultColumnMapper that provides quited identifiers mapping from the
+            original left and right dataframe to the joined dataframe, it is guaranteed to
+            include mapping for index + data columns, ordering columns and row position column
+            if exists.
+    """
+    index_join_info = _get_index_columns_to_join(left, right)
+
+    joined_frame, result_column_mapper = join(
+        left,
+        right,
+        how=how,
+        left_on=index_join_info.left_join_quoted_identifiers,
+        right_on=index_join_info.right_join_quoted_identifiers,
+        sort=sort,
+        # To match native pandas behavior, join index columns are coalesced.
+        join_key_coalesce_config=[JoinKeyCoalesceConfig.LEFT]
+        * len(index_join_info.left_join_quoted_identifiers),
+        inherit_join_index=InheritJoinIndex.FROM_BOTH,
+    )
+
+    joined_frame = _reorder_index_columns(
+        joined_frame, index_join_info.result_index_labels
+    )
+
+    return JoinOrAlignInternalFrameResult(joined_frame, result_column_mapper)
+
+
+def convert_incompatible_types_to_variant(
+    left: InternalFrame,
+    right: InternalFrame,
+    left_ids: list[str],
+    right_ids: list[str],
+) -> tuple[InternalFrame, InternalFrame]:
+    """
+    Check types of given identifiers if they are not compatible covert them to variant.
+    Two types are considered compatible if both same or both are numeric types.
+    Args:
+        left: Left frame.
+        right: Right frame
+        left_ids: List of Snowflake identifiers to check in left frame.
+        right_ids: List of Snowflake identifiers to check in right frame. Length of
+            this list must be same as length of 'left_ids'
+
+    Returns:
+        Tuple of left and right frames with updated columns.
+    """
+    assert len(left_ids) == len(right_ids)
+
+    left_id_to_type_map = left.quoted_identifier_to_snowflake_type()
+    right_id_to_type_map = right.quoted_identifier_to_snowflake_type()
+
+    left_to_variant = {}
+    right_to_variant = {}
+
+    for left_id, right_id in zip(left_ids, right_ids):
+        left_type = left_id_to_type_map[left_id]
+        right_type = right_id_to_type_map[right_id]
+        if not is_compatible_snowpark_types(left_type, right_type):
+            if left_type != VariantType:
+                left_to_variant[left_id] = to_variant(left_id)
+            if right_type != VariantType:
+                right_to_variant[right_id] = to_variant(right_id)
+
+    left = left.update_snowflake_quoted_identifiers_with_expressions(
+        left_to_variant
+    ).frame
+    right = right.update_snowflake_quoted_identifiers_with_expressions(
+        right_to_variant
+    ).frame
+
+    return left, right
+
+
+def align(
+    left: InternalFrame,
+    right: InternalFrame,
+    left_on: list[str],
+    right_on: list[str],
+    how: AlignTypeLit = "outer",
+) -> JoinOrAlignInternalFrameResult:
+    """
+    Align the left and the right frame on given columns 'left_on' and 'right_on' with
+    given join method (`how`).
+
+    Args:
+        left: Left Internal frame
+        right: Right Internal frame
+        left_on: List of snowflake identifiers to align on from 'left' frame.
+        right_on: List of snowflake identifiers to align on from 'right' frame. Length
+            of this list must be same as length 'left_on'.
+        how:
+            * left: use only index from left frame, preserve left order.
+            * coalesce: use only index from left frame, preserve left order. If left
+              frame is empty left_on columns are coalesced with right_on columns.
+            * outer: use union of index from both frames, sort index lexicographically.
+    Returns:
+        New aligned InternalFrame by aligning left frame with right frame.
+    """
+    assert len(left_on) == len(right_on), "left_on and right_on must be of same length"
+    # Example 1 (left is non-empty):
+    # left:
+    # li  A  left_row_pos
+    # 1   a  0
+    # 2   b  1
+    #
+    # right:
+    # ri  B  right_row_pos
+    # 3   d  0
+    # 4   e  1
+    #
+
+    # Example 2 (left is empty):
+    # left:
+    # li  A  left_row_pos
+    # <no rows>
+    #
+    # right:
+    # ri  B  right_row_pos
+    # 3   d  0
+    # 4   e  1
+    aligned_ordered_frame = left.ordered_dataframe.align(
+        right.ordered_dataframe,
+        left_on_cols=left_on,
+        right_on_cols=right_on,
+        how=how,
+    )
+    # aligned_ordered_frame after aligning on row_position columns
+    # Example 1 (left is empty not empty):
+    # aligned_ordered_frame:
+    # li  A  left_row_pos  row_count ri  B  right_row_pos
+    # 1   a  0             2         3   d  1
+    # 2   b  1             2         4   e  2
+
+    # Example 2 (left is empty):
+    # aligned_ordered_frame:
+    # li    A    left_row_pos  row_count ri  B  right_row_pos
+    # NULL  NULL 1             NULL      3   d  1
+    # NULL  NULL 2             NULL      4   e  2
+    coalesce_key_config = None
+    inherit_join_index = InheritJoinIndex.FROM_LEFT
+    # When it is `outer` align, we need to coalesce the align columns. However, if the
+    # ordering columns of aligned result is the same as the left frame, that means the
+    # join columns of left and right matches, then there is no need to coalesce the join
+    # keys, simply inherent from left gives the correct result.
+    # Retaining the original columns also helps avoid unnecessary join in later steps.
+    if (
+        how == "outer"
+        and aligned_ordered_frame.ordering_columns != left.ordering_columns
+    ):
+        coalesce_key_config = [JoinKeyCoalesceConfig.LEFT] * len(left_on)
+        inherit_join_index = InheritJoinIndex.FROM_BOTH
+    (
+        aligned_frame,
+        result_column_mapper,
+    ) = _create_internal_frame_with_join_or_align_result(
+        aligned_ordered_frame,
+        left,
+        right,
+        left_on=left_on,
+        right_on=right_on,
+        how=how,
+        sort=False,
+        key_coalesce_config=coalesce_key_config,
+        inherit_index=inherit_join_index,
+    )
+    return JoinOrAlignInternalFrameResult(aligned_frame, result_column_mapper)
+
+
+def align_on_index(
+    left: InternalFrame,
+    right: InternalFrame,
+    how: AlignTypeLit = "outer",
+) -> JoinOrAlignInternalFrameResult:
+    """
+    Align the left and the right frame on the index columns with given join method (`how`).
+
+    The index columns used for align are decided in the same way as join on index, please refer to
+    _get_index_columns_to_join for details about how index columns used for align is decided.
+
+    Please refer to align operator in OrderedDataFrame for details about how align operation is
+    performed.
+
+    Args:
+        left: Left DataFrame.
+        right: right DataFrame.
+        how: the align method {{'left', 'coalesce', 'outer'}}, by default is outer
+            * left: use only index from left frame, preserve left order.
+            * coalesce: if left frame has non-zero rows use only index from left
+                frame, preserve left order otherwise use only right index and preserver
+                right order.
+            * outer: use union of index from both frames, sort index lexicographically.
+    Returns:
+        An InternalFrame for the aligned result.
+        A JoinOrAlignResultColumnMapper that provides quited identifiers mapping from the
+            original left and right dataframe to the aligned dataframe, it is guaranteed to
+            include mapping for index + data columns, ordering columns and row position column
+            if exists.
+    """
+
+    index_join_info = _get_index_columns_to_join(left, right)
+    # Re-project the active columns to make sure all active columns of the internal frame participate in
+    # the align operation, and unnecessary columns are dropped from the projection.
+    left = left.select_active_columns()
+    right = right.select_active_columns()
+
+    aligned_frame, result_column_mapper = align(
+        left,
+        right,
+        left_on=index_join_info.left_join_quoted_identifiers,
+        right_on=index_join_info.right_join_quoted_identifiers,
+        how=how,
+    )
+    if how == "outer":
+        # index reorder should only be needed for outer join since this is the only method inherent
+        # index from both side and coalesces the align on keys.
+        aligned_frame = _reorder_index_columns(
+            aligned_frame, target_index_labels=index_join_info.result_index_labels
+        )
+    return JoinOrAlignInternalFrameResult(aligned_frame, result_column_mapper)
+
+
+class JoinOrAlignOrderedDataframeResultHelper:
+    """
+    Helper class for join or aligned result that does the following:
+    1) Handles ordering of final result according to sort value.
+    2) Provide interfaces that help map the snowflake quoted identifiers from the original
+        ordered dataframe to the result dataframe.
+
+    Note that
+        1) self.join_or_align_result gives the join or aligned result with the correct order
+            based on the sort configuration, not the original join or align result. The caller
+            should always use the join_or_align_result of this helper class for any post-processing left to do.
+        2) this class operates on the ordered dataframe
+    """
+
+    # The result ordered frame after join or align, sorted on join keys if
+    # sort is set True during init.
+    join_or_align_result: OrderedDataFrame
+    # The join or align on columns in the original left frame
+    _left_on: list[str]
+    # The join or align on columns in the original right frame
+    _right_on: list[str]
+    # Join or Align type
+    _how: Union[JoinTypeLit, AlignTypeLit]
+    result_column_mapper: JoinOrAlignResultColumnMapper
+
+    def __init__(
+        self,
+        left: OrderedDataFrame,
+        right: OrderedDataFrame,
+        origin_join_or_align_res: OrderedDataFrame,
+        left_on: list[str],
+        right_on: list[str],
+        how: Union[JoinTypeLit, AlignTypeLit],
+        sort: Optional[bool] = None,
+    ) -> None:
+        self._left_on = left_on
+        self._right_on = right_on
+        # create a mapping between
+        original_left_quoted_identifiers = (
+            left.projected_column_snowflake_quoted_identifiers
+        )
+        original_right_quoted_identifiers = (
+            right.projected_column_snowflake_quoted_identifiers
+        )
+        result_quoted_identifiers = (
+            origin_join_or_align_res.projected_column_snowflake_quoted_identifiers
+        )
+        # build a map between the quoted identifiers for the original left and right frame to the
+        # corresponding quoted identifiers in result_ordered_frame. The projected columns of result_ordered_frame
+        # for both join and align is guaranteed to be in the order of
+        # <left projected columns> + <right projected columns with deduplication> + <extra ordering column>
+        left_len = len(original_left_quoted_identifiers)
+        right_len = len(original_right_quoted_identifiers)
+        result_left_quoted_identifier = result_quoted_identifiers[:left_len]
+        result_right_quoted_identifier = result_quoted_identifiers[
+            left_len : (left_len + right_len)
+        ]
+        self.result_column_mapper = JoinOrAlignResultColumnMapper(
+            left_quoted_identifiers_map=dict(
+                zip(original_left_quoted_identifiers, result_left_quoted_identifier)
+            ),
+            right_quoted_identifiers_map=dict(
+                zip(original_right_quoted_identifiers, result_right_quoted_identifier)
+            ),
+        )
+
+        self.join_or_align_result = origin_join_or_align_res
+        self._how = how
+        if sort is True:
+            # if sort is True, sort the result frame on the join keys
+            self._sort_on_join_keys()
+
+    def _sort_on_join_keys(self) -> None:
+        """
+        Sort join_or_align_result by join keys, which is done by set the preceding ordering
+        columns to coalesce(left_col, right_col) for each pair of left_on and right_on.
+        Update the join_or_align_result to the sorted frame.
+        """
+        if len(self._left_on) == 0:
+            return
+
+        join_or_align_result = self.join_or_align_result
+        # Native pandas takes the union of join keys from both the frames and orders
+        # the merged frame using this union of keys.
+        # In Snowpark pandas API we implement this by coalescing join keys together and
+        # using these coalesced columns for ordering.
+        # This coalescing is only required for 'outer' join. For 'inner' and 'left' join
+        # we use left join keys and for 'right' join we use right join keys.
+
+        mapped_left_on = self.map_left_quoted_identifiers(self._left_on)
+        mapped_right_on = self.map_right_quoted_identifiers(self._right_on)
+        if self._how == "outer":
+            ordering_column_identifiers = join_or_align_result.generate_snowflake_quoted_identifiers(
+                # Use 'ordering' as prefix for generated identifiers.
+                pandas_labels=[ORDERING_COLUMN_LABEL]
+                * len(self._left_on),
+            )
+            # Generate an expression equivalent of
+            # "COALESCE('left_col', 'right_col') AS 'ordering_col_<N>'"
+            ordering_column_values = [
+                coalesce(left_col, right_col)
+                for left_col, right_col in zip(mapped_left_on, mapped_right_on)
+            ]
+
+            join_or_align_result = append_columns(
+                join_or_align_result,
+                ordering_column_identifiers,
+                ordering_column_values,
+            )
+        elif self._how == "right":
+            ordering_column_identifiers = mapped_right_on
+        else:  # left join, inner join, left align, coalesce align
+            ordering_column_identifiers = mapped_left_on
+
+        # When sort is True using only join columns for ordering is not sufficient
+        # for stable ordering because join keys may have duplicates. To provide same
+        # behavior as native pandas duplicate values in join columns should preserve the
+        # order from input frames.
+        # So we append ordering columns from input frames to break the tie and provide
+        # stable ordering as native pandas.
+        ordering_columns = [
+            OrderingColumn(key) for key in ordering_column_identifiers
+        ] + join_or_align_result.ordering_columns
+
+        # reset the order of the ordered_dataframe to the final order
+        self.join_or_align_result = join_or_align_result.sort(ordering_columns)
+
+    def map_left_quoted_identifiers(self, quoted_identifiers: list[str]) -> list[str]:
+        """
+        For a given set of quoted_identifiers from the original left frame, find the corresponding
+        columns in the join or align result frame.
+        """
+        return self.result_column_mapper.map_left_quoted_identifiers(quoted_identifiers)
+
+    def map_right_quoted_identifiers(self, quoted_identifiers: list[str]) -> list[str]:
+        """
+        For a given set of quoted_identifiers from the original right frame, find the corresponding
+        columns in the join or align result frame.
+        """
+        return self.result_column_mapper.map_right_quoted_identifiers(
+            quoted_identifiers
+        )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
new file mode 100644
index 00000000000..08fed491a3b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -0,0 +1,1894 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import sys
+import uuid
+from collections.abc import Hashable
+from dataclasses import dataclass
+from typing import Any, Optional, Union
+
+import pandas
+
+from snowflake.snowpark._internal.type_utils import (
+    ColumnOrName,
+    ColumnOrSqlExpr,
+    LiteralType,
+)
+from snowflake.snowpark._internal.utils import parse_positional_args_to_list
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.dataframe_writer import DataFrameWriter
+from snowflake.snowpark.functions import (
+    coalesce,
+    count,
+    iff,
+    lit,
+    max as max_,
+    not_,
+    row_number,
+    sum as sum_,
+)
+from snowflake.snowpark.modin.plugin._typing import AlignTypeLit, JoinTypeLit
+from snowflake.snowpark.row import Row
+from snowflake.snowpark.session import Session
+from snowflake.snowpark.table_function import TableFunctionCall
+from snowflake.snowpark.types import StructType
+from snowflake.snowpark.window import Window
+
+# Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable
+# Python 3.9 can use both
+# Python 3.10 needs to use collections.abc.Iterable because typing.Iterable is removed
+if sys.version_info <= (3, 9):
+    from collections.abc import Iterable
+else:
+    from collections.abc import Iterable
+
+
+@dataclass(frozen=True)
+class OrderingColumn:
+    """
+    Representation of an ordering column for the dataframe. The ordering column is
+    used for recovering the data order for the dataframe.
+    """
+
+    # The snowflake quoted identifier for the column that is used for ordering.
+    snowflake_quoted_identifier: str
+    # Whether sort the column in ascending or descending order, by default it is sort in ascending order.
+    ascending: bool = True
+    # Whether the null value comes before or after the none null values during sort, by default,
+    # null values comes after non-null values.
+    na_last: bool = True
+
+    @property
+    def snowpark_column(self) -> Column:
+        """
+        The corresponding SnowparkColumn that can be used for snowpark
+        dataframe for sorting.
+        """
+        col = Column(self.snowflake_quoted_identifier)
+
+        if self.ascending and self.na_last:
+            return col.asc_nulls_last()
+        elif self.ascending and not self.na_last:
+            return col.asc_nulls_first()
+        elif not self.ascending and self.na_last:
+            return col.desc_nulls_last()
+        else:  # i.e., not self.ascending and not self.na_last:
+            return col.desc_nulls_first()
+
+    def reverse(self) -> "OrderingColumn":
+        """
+        Generate a reversed OrderingColumn. To reverse the order of an `OrderingColumn`, we need to reverse both
+        `ascending` and `na_last`. For example, an `OrderingColumn`( "column1" with ascending=True and na_last=False):
+
+            SELECT column1
+            FROM VALUES (1), (null), (2), (null), (3)
+            ORDER BY column1
+            ASC NULLS FIRST;
+
+        The results will be (null), (null), 1, 2, 3.
+
+        To reverse the order, the new `OrderingColumn` will be "column1" with ascending=False and na_last=True:
+
+            SELECT column1
+            FROM VALUES (1), (null), (2), (null), (3)
+            ORDER BY column1
+            DESC NULLS LAST;
+
+        The results become 3, 2, 1, (null), (null).
+
+        Returns:
+            The reversed column.
+        """
+        return OrderingColumn(
+            snowflake_quoted_identifier=self.snowflake_quoted_identifier,
+            ascending=not self.ascending,
+            na_last=not self.na_last,
+        )
+
+
+class DataFrameReference:
+    """
+    A class for referencing a SnowparkDataFrame object and providing access to its schema,
+    which is designed to enable the mutability of Snowpark DataFrame and sharing across different OrderedDataFrame.
+    """
+
+    # Snowflake quoted identifiers of ALL Snowpark dataframe columns that is cached in the class.
+    # This must be in the same length and order as the Snowpark dataframe.
+    # Note that this field is Optional, when it is None, it doesn't mean the Snowpark dataframe
+    # doesn't have any column, it only means the quoted identifiers of the Snowpark dataframe isn't
+    # cached by the class.
+    cached_snowflake_quoted_identifiers_tuple: Optional[tuple[str, ...]]
+
+    def __init__(
+        self,
+        snowpark_dataframe: SnowparkDataFrame,
+        snowflake_quoted_identifiers: Optional[list[str]] = None,
+    ) -> None:
+        """
+        Constructor for  DataFrameReference.
+
+        Args:
+            snowpark_dataframe: SnowparkDataFrame. The Snowark dataframe that it refers to
+            snowflake_quoted_identifiers: the snowflake quoted identifiers for the Snowpark dataframe that it
+                refers to. Must include identifiers for all columns, not a subset.
+        """
+        self.snowpark_dataframe = snowpark_dataframe
+        self._id = uuid.uuid4()  # for debug purpose only
+        if snowflake_quoted_identifiers is not None:
+            self.cached_snowflake_quoted_identifiers_tuple = tuple(
+                snowflake_quoted_identifiers
+            )
+        else:
+            self.cached_snowflake_quoted_identifiers_tuple = None
+
+    @property
+    def schema(self) -> StructType:
+        """
+        Get the schema of the referenced SnowparkDataFrame.
+
+        Returns:
+            StructType: The schema of the referenced SnowparkDataFrame.
+        """
+        return self.snowpark_dataframe.schema
+
+    @property
+    def snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        The snowflake quoted identifiers for all snowpark dataframe columns.
+        """
+        if self.cached_snowflake_quoted_identifiers_tuple is None:
+            # if there is no cached snowflake quoted identifiers, reach to the
+            # Snowpark schema to fetch the identifiers. Note this will trigger one query
+            # describe call to server.
+            self.cached_snowflake_quoted_identifiers_tuple = tuple(
+                f.column_identifier.quoted_name for f in self.schema.fields
+            )
+
+        return list(self.cached_snowflake_quoted_identifiers_tuple)
+
+
+def _raise_if_identifier_not_exists(
+    identifiers: list[str], existing_identifiers: list[str], category: str = ""
+) -> None:
+    """Checks whether all elements in `identifiers` existing in `existing_identifiers`"""
+    existing_identifiers_set = set(existing_identifiers)
+    for identifier in identifiers:
+        if not isinstance(identifier, str):
+            raise AssertionError(
+                f"Only column identifier with string type is allowed for {category}"
+            )
+        if identifier not in existing_identifiers_set:
+            raise AssertionError(
+                f"{category} {identifier} not found in {existing_identifiers}"
+            )
+
+
+class OrderedDataFrame:
+    """
+    A mutable class representing an ordered DataFrame with projection columns and ordering columns.
+    It allows you to specify a DataFrameReference to reference the source Snowpark DataFrame,
+    select projected columns, define ordering columns and the row position identifier. We only
+    provide a minimal set of dataframe operation methods in this class, which are enough to implement
+    pandas dataframe operations.
+
+    Note:
+        - This class maintains project columns and ordering columns, where some ordering columns may
+          not be in project columns.
+        - All identifiers used here are Snowflake quoted identifiers.
+        - Currently, only select and ensure_row_position_column operation will not update
+          the DataFrameReference inside the class.
+        - "Order" or "Ordering" is defined as the order of the source Snowpark DataFrame, which may
+          or may not exist. If not exist, we assign a default order that all projected columns will
+          be used as ordering columns
+        - Ordering semantics,
+            - `select`, `filter`, `limit` and `dropna` don't change ordering columns.
+            - `sort` will update ordering columns by the specified columns.
+            - `group_by` will use the grouped columns as ordering columns.
+            - `rename` will keep the original ordering columns with updated names.
+            - `join` with `right` join method preserves the right frame order, followed by self/left
+               frame order. For all other join methods, the result preserves the self/left frame order
+               first, followed by the right frame order.
+            - `align` sorts lexicographically on the align keys for `outer` method. For `left`, it
+               preserves the left order first, and then followed by right order, same as join.
+            - `union_all`, `pivot`, `unpivot` and `agg` uses the default order.
+    """
+
+    # A DataFrameReference object referencing the source Snowpark DataFrame.
+    _dataframe_ref: DataFrameReference
+    # The snowflake quoted identifiers for projected columns, which is represented as tuple of
+    # string to ensure immutability. Note that the projected columns is a subset of snowflake quoted
+    # identifiers of the underlying Snowpark DataFrame in _dataframe_ref.
+    _projected_column_snowflake_quoted_identifiers_tuple: tuple[str, ...]
+    # a tuple of OrderingColumn objects that defines the order of the current OrderedDataFrame
+    # It must be part of projected columns
+    _ordering_columns_tuple: tuple[OrderingColumn, ...]
+    # row position snowflake quoted identifier
+    row_position_snowflake_quoted_identifier: Optional[str]
+    # row count snowflake quoted identifier
+    row_count_snowflake_quoted_identifier: Optional[str]
+
+    def __init__(
+        self,
+        dataframe_ref: DataFrameReference,
+        *,
+        projected_column_snowflake_quoted_identifiers: Optional[list[str]] = None,
+        ordering_columns: Optional[list[OrderingColumn]] = None,
+        row_position_snowflake_quoted_identifier: Optional[str] = None,
+        row_count_snowflake_quoted_identifier: Optional[str] = None,
+    ) -> None:
+        self._dataframe_ref = dataframe_ref
+
+        all_existing_snowflake_quoted_identifiers = (
+            self._dataframe_ref.snowflake_quoted_identifiers
+        )
+
+        # If projected_columns is not specified, it will be all columns in the source Snowpark DataFrame
+        if projected_column_snowflake_quoted_identifiers:
+            _raise_if_identifier_not_exists(
+                projected_column_snowflake_quoted_identifiers,
+                all_existing_snowflake_quoted_identifiers,
+                "projected column",
+            )
+            self._projected_column_snowflake_quoted_identifiers_tuple = tuple(
+                projected_column_snowflake_quoted_identifiers
+            )
+        else:
+            self._projected_column_snowflake_quoted_identifiers_tuple = tuple(
+                all_existing_snowflake_quoted_identifiers
+            )
+        # If ordering_columns is not specified, all projected columns will be used as
+        # ordering columns by default.
+        # Note that an ordering column may not be in projected columns,
+        # and only need to be in the source Snowpark dataframe
+        if ordering_columns is not None:
+            _raise_if_identifier_not_exists(
+                [column.snowflake_quoted_identifier for column in ordering_columns],
+                all_existing_snowflake_quoted_identifiers,
+                "ordering column",
+            )
+            self._ordering_columns_tuple = tuple(ordering_columns)
+        else:
+            self._ordering_columns_tuple = tuple(
+                OrderingColumn(column)
+                for column in self._projected_column_snowflake_quoted_identifiers_tuple
+            )
+        assert self.ordering_columns, "ordering_columns cannot be empty"
+        if row_position_snowflake_quoted_identifier:
+            _raise_if_identifier_not_exists(
+                [row_position_snowflake_quoted_identifier],
+                all_existing_snowflake_quoted_identifiers,
+                "row position column",
+            )
+        if row_count_snowflake_quoted_identifier:
+            _raise_if_identifier_not_exists(
+                [row_count_snowflake_quoted_identifier],
+                all_existing_snowflake_quoted_identifiers,
+                "row count column",
+            )
+        self.row_position_snowflake_quoted_identifier = (
+            row_position_snowflake_quoted_identifier
+        )
+        self.row_count_snowflake_quoted_identifier = (
+            row_count_snowflake_quoted_identifier
+        )
+
+    @property
+    def ordering_columns(self) -> list[OrderingColumn]:
+        return list(self._ordering_columns_tuple)
+
+    def _ordering_snowpark_columns(self) -> list[Column]:
+        """
+        Returns a list of SnowparkColumns that can be applied to the snowpark
+        dataframe to derive the ordered result.
+
+        Returns:
+            List of SnowparkColumn
+        """
+        return [col.snowpark_column for col in self.ordering_columns]
+
+    @property
+    def ordering_column_snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        Get all snowflake quoted identifiers for the ordering columns.
+
+        Return:
+            List of snowflake quoted identifier for the ordering columns
+        """
+        return [col.snowflake_quoted_identifier for col in self.ordering_columns]
+
+    def _row_position_snowpark_column(self) -> Column:
+        """
+        Returns a row position Snowpark column for the dataframe.
+        If row position column already exist in the current dataframe, it will be directly returned.
+        Otherwise, the row position column will be generated based on the ordering columns.
+
+        Return:
+            SnowparkColumn to get the row position column.
+        """
+
+        if self.row_position_snowflake_quoted_identifier is not None:
+            return Column(self.row_position_snowflake_quoted_identifier)
+
+        return row_number().over(Window.order_by(self._ordering_snowpark_columns())) - 1
+
+    @property
+    def projected_column_snowflake_quoted_identifiers(self) -> list[str]:
+        """
+        Returns:
+            List of snowflake quoted identifiers for the projected columns of the current ordered dataframe.
+        """
+        return list(self._projected_column_snowflake_quoted_identifiers_tuple)
+
+    def ensure_row_position_column(self) -> "OrderedDataFrame":
+        """
+        Returns an OrderedDataFrame with a row position column, and the row position column is guaranteed to
+        be part of the projected column.
+        If self.row_position_snowflake_quoted_identifier is not None, there is already a row position column,
+        no new row position column will be generated.
+        Note that the returned OrderedDataframe retains the original ordering columns.
+        """
+        if self.row_position_snowflake_quoted_identifier is not None:
+            # if the row position column is not part of the projected columns, do a select to make sure
+            # the row position column becomes part of the projected columns.
+            if (
+                self.row_position_snowflake_quoted_identifier
+                not in self.projected_column_snowflake_quoted_identifiers
+            ):
+                return self.select(
+                    self.projected_column_snowflake_quoted_identifiers
+                    + [self.row_position_snowflake_quoted_identifier]
+                )
+            else:
+                return self
+
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            ROW_POSITION_COLUMN_LABEL,
+        )
+
+        row_position_snowflake_quoted_identifier = (
+            self.generate_snowflake_quoted_identifiers(
+                pandas_labels=[ROW_POSITION_COLUMN_LABEL],
+                wrap_double_underscore=True,
+            )[0]
+        )
+        ordered_dataframe = self.select(
+            *self.projected_column_snowflake_quoted_identifiers,
+            self._row_position_snowpark_column().as_(
+                row_position_snowflake_quoted_identifier
+            ),
+        )
+        # inplace update so dataframe_ref can be shared. Note that we keep
+        # the original ordering columns.
+        ordered_dataframe.row_position_snowflake_quoted_identifier = (
+            row_position_snowflake_quoted_identifier
+        )
+        return ordered_dataframe
+
+    def ensure_row_count_column(self) -> "OrderedDataFrame":
+        """
+        Returns an OrderedDataFrame with a row count column, and the row count column is guaranteed to
+        be part of the projected column.
+        If self.row_count_snowflake_quoted_identifier is not None, there is already a row count column,
+        no new row count column will be generated.
+        Note that the returned OrderedDataframe retains the original ordering columns.
+        """
+        if self.row_count_snowflake_quoted_identifier is not None:
+            # if the row count column is not part of the projected columns, do a select to make sure
+            # the row count column becomes part of the projected columns.
+            if (
+                self.row_count_snowflake_quoted_identifier
+                not in self.projected_column_snowflake_quoted_identifiers
+            ):
+                return self.select(
+                    self.projected_column_snowflake_quoted_identifiers
+                    + [self.row_count_snowflake_quoted_identifier]
+                )
+            else:
+                return self
+
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            ROW_COUNT_COLUMN_LABEL,
+        )
+
+        row_count_snowflake_quoted_identifier = (
+            self.generate_snowflake_quoted_identifiers(
+                pandas_labels=[ROW_COUNT_COLUMN_LABEL],
+                wrap_double_underscore=True,
+            )[0]
+        )
+        ordered_dataframe = self.select(
+            *self.projected_column_snowflake_quoted_identifiers,
+            count("*").over().as_(row_count_snowflake_quoted_identifier),
+        )
+        # inplace update so dataframe_ref can be shared. Note that we keep
+        # the original ordering columns.
+        ordered_dataframe.row_count_snowflake_quoted_identifier = (
+            row_count_snowflake_quoted_identifier
+        )
+        return ordered_dataframe
+
+    def generate_snowflake_quoted_identifiers(
+        self,
+        *,
+        pandas_labels: list[Hashable],
+        excluded: Optional[list[str]] = None,
+        wrap_double_underscore: Optional[bool] = False,
+    ) -> list[str]:
+        """
+        See detailed docstring of generate_snowflake_quoted_identifiers_helper in
+        snowflake/snowpark/modin/plugin/_internal/utils.
+        The only difference between this method and generate_snowflake_quoted_identifiers_helper is
+        that all snowflake quoted identifier of the underlying snowpark dataframe in the dataframe_ref
+        will be added to `excluded`.
+        """
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            generate_snowflake_quoted_identifiers_helper,
+        )
+
+        if not excluded:
+            excluded = []
+        existing_identifiers = self._dataframe_ref.snowflake_quoted_identifiers
+        return generate_snowflake_quoted_identifiers_helper(
+            pandas_labels=pandas_labels,
+            excluded=excluded + existing_identifiers,
+            wrap_double_underscore=wrap_double_underscore,
+        )
+
+    @property
+    def schema(self) -> StructType:
+        """Get the schema of OrderedDataFrame. It only includes the schema of projected columns."""
+        quoted_identifier_to_field_mapping = {
+            field.column_identifier.quoted_name: field
+            for field in self._dataframe_ref.schema.fields
+        }
+        return StructType(
+            [
+                quoted_identifier_to_field_mapping[identifier]
+                for identifier in self.projected_column_snowflake_quoted_identifiers
+                if identifier in quoted_identifier_to_field_mapping
+            ]
+        )
+
+    @property
+    def queries(self) -> dict[str, list[str]]:
+        """Get underlying SQL queries of an OrderedDataFrame."""
+        return self._dataframe_ref.snowpark_dataframe.queries
+
+    @property
+    def session(self) -> Session:
+        """Returns a Snowpark session object associated with this ordered dataframe."""
+        return self._dataframe_ref.snowpark_dataframe.session
+
+    def _get_active_column_snowflake_quoted_identifiers(
+        self,
+        include_ordering_columns: bool = True,
+        include_row_position_column: bool = True,
+        include_row_count_column: bool = True,
+    ) -> list[str]:
+        """
+        Get the snowflake quoted identifiers for columns that are active for the ordering dataframe.
+        The columns that are active for an ordering dataframe includes the projected columns, ordering columns
+        and row position column if exists, ordering columns and row position column can be excluded based on
+        configuration.
+
+        Args:
+            include_ordering_columns: whether include the snowflake quoted identifiers for the ordering columns in the result
+            include_row_position_column: whether include the snowflake quoted identifier for the row position column in the result
+            include_row_count_column: whether or not to include the snowflake quoted identifier for the row count column in the result
+
+        Returns:
+            List of the snowflake quoted identifiers for the in use columns.
+
+        """
+
+        column_quoted_identifiers = self.projected_column_snowflake_quoted_identifiers
+        if include_ordering_columns:
+            extra_ordering_column_quoted_identifiers = [
+                quoted_identifier
+                for quoted_identifier in self.ordering_column_snowflake_quoted_identifiers
+                if quoted_identifier not in column_quoted_identifiers
+            ]
+            column_quoted_identifiers.extend(extra_ordering_column_quoted_identifiers)
+        if (
+            include_row_position_column
+            and self.row_position_snowflake_quoted_identifier is not None
+        ):
+            if (
+                self.row_position_snowflake_quoted_identifier
+                not in column_quoted_identifiers
+            ):
+                column_quoted_identifiers.append(
+                    self.row_position_snowflake_quoted_identifier
+                )
+        if (
+            include_row_count_column
+            and self.row_count_snowflake_quoted_identifier is not None
+        ):
+            if (
+                self.row_count_snowflake_quoted_identifier
+                not in column_quoted_identifiers
+            ):
+                column_quoted_identifiers.append(
+                    self.row_count_snowflake_quoted_identifier
+                )
+
+        return column_quoted_identifiers
+
+    def _extract_quoted_identifiers_from_column_or_name(
+        self, col: ColumnOrName
+    ) -> list[str]:
+        """
+        Extract the snowflake quoted identifiers out of a Column or column name with following rule:
+        1) when it is a Snowpark column, only column with alias name can be handled, the alias name is
+            extracted as the quoted identifier.
+        2) when it is a str
+            a) if it is a start (*), then all projected columns snowflake quoted identifiers are added
+            b) otherwise, it is treated as a name of existing column, and only active columns of the current
+                ordered dataframe can be used.
+        e) AssertionError is raised for all cases can not be handled.
+        """
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            is_valid_snowflake_quoted_identifier,
+        )
+
+        if isinstance(col, Column):
+            if col._expression.pretty_name == "ALIAS":
+                column_name = col._named().name
+                assert is_valid_snowflake_quoted_identifier(
+                    column_name
+                ), f"Invalid snowflake quoted identifier {column_name} used"
+                return [column_name]
+            else:
+                raise AssertionError(
+                    f"Column {col} is invalid, only column with alias name is allowed!"
+                )
+        elif isinstance(col, str):
+            if col == "*":
+                # star adds all projected columns to the result
+                return self.projected_column_snowflake_quoted_identifiers
+            else:
+                active_columns = self._get_active_column_snowflake_quoted_identifiers()
+                if col in active_columns:
+                    return [col]
+                else:
+                    raise AssertionError(
+                        f"Column {col} is not in active columns {active_columns}"
+                    )
+        else:
+            raise AssertionError(
+                f"Can not extract name for {col}, only Column with alias name or str can be handled!"
+            )
+
+    def select(
+        self,
+        *cols: Union[
+            Union[ColumnOrName, TableFunctionCall],
+            Iterable[Union[ColumnOrName, TableFunctionCall]],
+        ],
+    ) -> "OrderedDataFrame":
+        """
+        Returns a DataFrame by selecting the specified columns.
+        Any existing columns that are not in `cols` will be still retained in the
+        source dataframe reference/snowpark DataFrame (unless `cols` contains TableFunctionCall object),
+        which will be shared across multiple ordered DataFrames.
+        Note that ordering columns will not be changed.
+
+        See detailed docstring in Snowpark DataFrame's select. Compared with Snowpark DataFrame's select,
+        this select has the following restrictions:
+        1. To select an existing column, must use column name instead of column expression. For example:
+            select(col("a")) is not allowed, but select("a") is allowed. Note: only existing active columns
+            can be selected, includes projected columns, ordering columns and row position column.
+        2. if you want to select a Column object, it must have an alias.
+        3. You can't select an aggregated column anymore (e.g., `max("a").as_("a")`).
+           To select an aggregated column, use `agg()`.
+        """
+        exprs = parse_positional_args_to_list(*cols)
+        assert exprs, "The input of select() cannot be empty"
+
+        # a list of new Column objects to be selected for new OrderedDataFrame
+        new_snowpark_column_objects: list[Column] = []
+        # a list of snowflake quoted identifiers as projected columns for new OrderedDataFrame
+        new_projected_columns: list[str] = []
+        for e in exprs:
+            if isinstance(e, TableFunctionCall):
+                # we couldn't handle TableFunctionCall, so just use the original select
+                snowpark_dataframe = self._dataframe_ref.snowpark_dataframe.select(
+                    *cols
+                )
+                return OrderedDataFrame(DataFrameReference(snowpark_dataframe))
+            elif isinstance(e, (Column, str)):
+                column_names = self._extract_quoted_identifiers_from_column_or_name(e)
+                new_projected_columns.extend(column_names)
+                if isinstance(e, Column):
+                    new_snowpark_column_objects.append(e)
+            else:
+                raise AssertionError(
+                    "Only columns with alias name, column names and table functions are accepted"
+                )
+
+        dataframe_ref = self._dataframe_ref
+        if len(new_snowpark_column_objects) > 0:
+            existing_columns = self._dataframe_ref.snowflake_quoted_identifiers
+            # this is an inplace update of the snowpark dataframe so it can be shared
+            dataframe_ref.snowpark_dataframe = dataframe_ref.snowpark_dataframe.select(
+                existing_columns + new_snowpark_column_objects
+            )
+            # update the managed quoted identifiers for the dataframe reference to the new set
+            # of identifiers.
+            new_column_identifiers = existing_columns + [
+                quoted_identifier
+                for quoted_identifier in new_projected_columns
+                if quoted_identifier not in existing_columns
+            ]
+            dataframe_ref.cached_snowflake_quoted_identifiers_tuple = tuple(
+                new_column_identifiers
+            )
+            logging.debug(
+                f"The Snowpark DataFrame in DataFrameReference with id={dataframe_ref._id} is updated"
+            )
+
+        return OrderedDataFrame(
+            dataframe_ref,
+            projected_column_snowflake_quoted_identifiers=new_projected_columns,
+            # keep the original ordering columns and row position column
+            ordering_columns=self.ordering_columns,
+            row_position_snowflake_quoted_identifier=self.row_position_snowflake_quoted_identifier,
+            row_count_snowflake_quoted_identifier=self.row_count_snowflake_quoted_identifier,
+        )
+
+    def dropna(
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Iterable[str]]] = None,
+    ) -> "OrderedDataFrame":
+        """
+        Returns a new DataFrame that excludes all rows containing fewer than
+        a specified number of non-null and non-NaN values in the specified
+        columns. Note that ordering columns will not be changed.
+
+        See detailed docstring in Snowpark DataFrame's dropna.
+        """
+
+        projected_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True
+        )
+        snowpark_dataframe = projected_dataframe_ref.snowpark_dataframe.dropna(
+            how=how, thresh=thresh, subset=subset
+        )
+        # dropna doesn't change the column quoted identifiers
+        result_column_quoted_identifiers = (
+            projected_dataframe_ref.snowflake_quoted_identifiers
+        )
+        return OrderedDataFrame(
+            DataFrameReference(snowpark_dataframe, result_column_quoted_identifiers),
+            projected_column_snowflake_quoted_identifiers=result_column_quoted_identifiers,
+            ordering_columns=self.ordering_columns,
+        )
+
+    def union_all(self, other: "OrderedDataFrame") -> "OrderedDataFrame":
+        """
+        Returns a new DataFrame that contains all the rows in the current DataFrame
+        and another DataFrame (``other``), including any duplicate rows. Both input
+        DataFrames must contain the same number of columns.
+        Note that the ordering columns will become all columns in the DataFrame.
+        TODO SNOW-966319: set ordering columns for union_all
+
+        See detailed docstring in Snowpark DataFrame's union_all.
+        """
+        self_snowpark_dataframe_ref = self._to_projected_snowpark_dataframe_reference()
+        other_snowpark_dataframe_ref = (
+            other._to_projected_snowpark_dataframe_reference()
+        )
+        # union all result uses the snowflake quoted identifiers of self snowpark dataframe
+        result_column_quoted_identifiers = (
+            self_snowpark_dataframe_ref.snowflake_quoted_identifiers
+        )
+        snowpark_dataframe = self_snowpark_dataframe_ref.snowpark_dataframe.union_all(
+            other_snowpark_dataframe_ref.snowpark_dataframe
+        )
+        return OrderedDataFrame(
+            DataFrameReference(snowpark_dataframe, result_column_quoted_identifiers),
+            projected_column_snowflake_quoted_identifiers=result_column_quoted_identifiers,
+        )
+
+    def _extract_aggregation_result_column_quoted_identifiers(
+        self,
+        *agg_exprs: ColumnOrName,
+    ) -> list[str]:
+        """
+        Extract the quoted identifiers for the aggregation columns.
+        """
+        exprs = parse_positional_args_to_list(*agg_exprs)
+        # extract the aggregation function name
+        result_column_quoted_identifiers: list[str] = []
+        for e in exprs:
+            if isinstance(e, (Column, str)):
+                column_names = self._extract_quoted_identifiers_from_column_or_name(e)
+                result_column_quoted_identifiers.extend(column_names)
+            else:
+                raise AssertionError(
+                    "Only column name or column expression with alias name can be used as aggregation expression"
+                )
+
+        return result_column_quoted_identifiers
+
+    def group_by(
+        self,
+        cols: Iterable[str],
+        *agg_exprs: ColumnOrName,
+    ) -> "OrderedDataFrame":
+        """
+        Groups rows by the columns specified by expressions (similar to GROUP BY in
+        SQL), which are followed by aggregations.
+        Note that the ordering columns will become `cols`.
+
+        Args:
+            cols: A list of Snowflake quoted identifiers for group by. We don't allow Column objects
+                here, which is different from Snowpark DataFrame's group_by
+            agg_exprs: aggregation expressions
+
+        See detailed docstring in Snowpark DataFrame's group_by.
+        """
+        # the result columns for groupby aggregate are the groupby columns + the aggregation columns
+        result_column_quoted_identifiers: list[str] = [
+            identifier for identifier in cols
+        ]  # add the groupby columns
+        # add the aggregation columns
+        result_column_quoted_identifiers += (
+            self._extract_aggregation_result_column_quoted_identifiers(*agg_exprs)
+        )
+
+        return OrderedDataFrame(
+            DataFrameReference(
+                self._dataframe_ref.snowpark_dataframe.group_by(cols).agg(*agg_exprs),
+                snowflake_quoted_identifiers=result_column_quoted_identifiers,
+            ),
+            projected_column_snowflake_quoted_identifiers=result_column_quoted_identifiers,
+            ordering_columns=[OrderingColumn(identifier) for identifier in cols],
+        )
+
+    def sort(
+        self,
+        *cols: Union[OrderingColumn, Iterable[OrderingColumn]],
+    ) -> "OrderedDataFrame":
+        """
+        Sorts a DataFrame by the specified expressions (similar to ORDER BY in SQL).
+        Note that this is not a real "sort", but just set the ordering columns.
+        """
+        # parse cols to get the new Ordering columns
+        ordering_columns = parse_positional_args_to_list(*cols)
+
+        # check all ordering_columns are in the current active columns
+        _raise_if_identifier_not_exists(
+            [column.snowflake_quoted_identifier for column in ordering_columns],
+            self._get_active_column_snowflake_quoted_identifiers(),
+            "ordering column",
+        )
+
+        # check if the column is the same as the ordering column now
+        if ordering_columns == self.ordering_columns:
+            return self
+
+        return OrderedDataFrame(
+            self._to_projected_snowpark_dataframe_reference(),
+            projected_column_snowflake_quoted_identifiers=self.projected_column_snowflake_quoted_identifiers,
+            ordering_columns=ordering_columns,
+            # should reset the row position column since ordering is updated
+            row_position_snowflake_quoted_identifier=None,
+            # No need to reset row count, since sorting should not add/drop rows.
+            row_count_snowflake_quoted_identifier=self.row_count_snowflake_quoted_identifier,
+        )
+
+    def pivot(
+        self,
+        pivot_col: ColumnOrName,
+        values: Optional[Union[Iterable[LiteralType]]] = None,
+        default_on_null: Optional[LiteralType] = None,
+        *agg_exprs: Union[Column, tuple[ColumnOrName, str], dict[str, str]],
+    ) -> "OrderedDataFrame":
+        """
+        Rotates this DataFrame by turning the unique values from one column in the input
+        expression into multiple columns and aggregating results (followed by `agg_exprs`)
+        where required on any remaining column values.
+        Note that the ordering columns will become all columns in the DataFrame.
+
+        See detailed docstring in Snowpark DataFrame's pivot.
+        """
+        snowpark_dataframe = self.to_projected_snowpark_dataframe()
+        return OrderedDataFrame(
+            # the pivot result columns for dynamic pivot are data dependent, a schema call is required
+            # to know all the quoted identifiers for the pivot result.
+            DataFrameReference(
+                snowpark_dataframe.pivot(
+                    pivot_col=pivot_col,
+                    values=values,
+                    default_on_null=default_on_null,
+                ).agg(*agg_exprs)
+            )
+        )
+
+    def unpivot(
+        self,
+        value_column: str,
+        name_column: str,
+        column_list: list[str],
+        col_mapper: Optional[dict[str, str]] = None,
+    ) -> "OrderedDataFrame":
+        """
+        Rotates a table by transforming columns into rows.
+        UNPIVOT is a relational operator that accepts two columns (from a table or subquery),
+        along with a list of columns, and generates a row for each column specified in the list.
+        Note that the ordering columns will become all columns in the DataFrame.
+
+        Prior to the unpivot the dataframe reference will be updated to avoid ambigious column
+        issues. An optional map, column_list_name_map can be used to map temporary column names
+        back to the expected column names used for the variable column in unpivot.
+
+        Args:
+            value_column: name of the value column, typically "values"
+            name_column: name of the variable column, containing the column names from the unpivot
+            column_list: list of columns to unpivot
+            col_mapper: a mapping between the current column_list names and the desired
+               column names which would be used in the new name_column
+
+        """
+        # check columns in column_list are in projected columns
+        _raise_if_identifier_not_exists(
+            [quoted_identifier for quoted_identifier in column_list],
+            self.projected_column_snowflake_quoted_identifiers,
+            "unpivot column list",
+        )
+        # Apply a select to project only the desired columns, and return a non-sharable
+        # dataframe reference because unpivot is destructive to the types of alignment/join
+        # optimizations intended.
+        projected_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            col_mapper=col_mapper
+        )
+        unpivot_column_list = column_list
+        # Remap the columns from the projection back to the desired names. This is
+        # particularly important if the projection has applied transformations to the
+        # columns to normalize the data.
+        if col_mapper is not None:
+            unpivot_column_list = []
+            for col in column_list:
+                if col in col_mapper:
+                    unpivot_column_list.append(col_mapper[col])
+                else:
+                    unpivot_column_list.append(col)
+
+        # all columns other than the unpivot columns are retained in the result
+        result_column_quoted_identifiers = [
+            quoted_identifier
+            for quoted_identifier in projected_dataframe_ref.snowflake_quoted_identifiers
+            if quoted_identifier not in unpivot_column_list
+        ]
+        # add the name column and value colum to the result
+        result_column_quoted_identifiers += [name_column, value_column]
+        return OrderedDataFrame(
+            DataFrameReference(
+                projected_dataframe_ref.snowpark_dataframe.unpivot(
+                    value_column=value_column,
+                    name_column=name_column,
+                    column_list=unpivot_column_list,
+                ),
+                snowflake_quoted_identifiers=result_column_quoted_identifiers,
+            ),
+            projected_column_snowflake_quoted_identifiers=result_column_quoted_identifiers,
+        )
+
+    def agg(
+        self,
+        *exprs: ColumnOrName,
+    ) -> "OrderedDataFrame":
+        """
+        Aggregates the data in the DataFrame. Use this method if you don't need to
+        group the data. Note that the ordering columns will become all columns in the DataFrame.
+
+        See detailed docstring in Snowpark DataFrame's agg.
+        """
+        snowpark_dataframe = self._dataframe_ref.snowpark_dataframe.agg(*exprs)
+        result_column_quoted_identifiers = (
+            self._extract_aggregation_result_column_quoted_identifiers(*exprs)
+        )
+        return OrderedDataFrame(
+            DataFrameReference(snowpark_dataframe, result_column_quoted_identifiers),
+            projected_column_snowflake_quoted_identifiers=result_column_quoted_identifiers,
+        )
+
+    def _deduplicate_active_column_snowflake_quoted_identifiers(
+        self,
+        against: "OrderedDataFrame",
+        include_ordering_columns: bool = True,
+        include_row_position_column: bool = True,
+        include_row_count_column: bool = True,
+    ) -> "OrderedDataFrame":
+        """
+        Deduplicate all active column snowflake quoted identifiers of the current OrderedDataFrame against the
+        given OrderedDataFrame. The active columns involve projected columns, ordering columns and row position column.
+        For example:
+            current dataframe has projected columns = ['"A"', '"B"', '"C"'], ordering columns = ['"D"', '"A"'], and
+                row position column = ['"row_pos"']
+            and given ordered dataframe to de-duplicate against has
+                projected columns = ['"A"', '"B"', '"E"'], ordering columns = ['"A"', '"E"'], and
+                row position column = ['"row_pos"']
+            After deduplication, the result ordered dataframe will have
+                projected_columns = ['"A_<suffix>"', '"B_<suffix>"', '"C"'], ordering columns = ['"D"', '"A_<suffix>"'],
+                and row position column = ['"row_pos_<suffix>"']
+            Where column '"A"', '"B"' and '"row_pos"' is deduplicated since they conflict with the columns in self.
+
+        Args:
+            against: The OrderedDataFrame to deduplicate
+            include_ordering_columns: Whether to include the ordering columns during deduplication.
+            include_row_position_column: Whether to include the row position column during deduplication.
+            include_row_count_column: Whether to include the row count column during deduplication.
+
+        Returns:
+            An OrderedDataFrame with ordering and projected columns properly deduplicated
+        """
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            unquote_name_if_quoted,
+        )
+
+        quoted_identifiers_to_deduplicate = (
+            self._get_active_column_snowflake_quoted_identifiers(
+                include_ordering_columns=include_ordering_columns,
+                include_row_position_column=include_row_position_column,
+                include_row_count_column=include_row_count_column,
+            )
+        )
+        quoted_identifiers_to_deduplicate_against = (
+            against._get_active_column_snowflake_quoted_identifiers(
+                include_ordering_columns=include_ordering_columns,
+                include_row_position_column=include_row_position_column,
+                include_row_count_column=include_row_count_column,
+            )
+        )
+        original_projected_columns_quoted_identifiers = (
+            self.projected_column_snowflake_quoted_identifiers
+        )
+
+        deduplicated_quoted_identifiers: list[str] = []
+        deduplicated_columns: list[Column] = []
+        for quoted_identifier in quoted_identifiers_to_deduplicate:
+            # if it conflicts with the left identifier, alias the column to a new name
+            if quoted_identifier in quoted_identifiers_to_deduplicate_against:
+                unquoted_name = unquote_name_if_quoted(quoted_identifier)
+                deduplicated_quoted_identifier = (
+                    self.generate_snowflake_quoted_identifiers(
+                        pandas_labels=[unquoted_name],
+                        excluded=quoted_identifiers_to_deduplicate_against
+                        + deduplicated_quoted_identifiers,
+                    )[0]
+                )
+                deduplicated_column = Column(quoted_identifier).as_(
+                    deduplicated_quoted_identifier
+                )
+            else:
+                # Note here we use the column name directly, because column expression with no alias name is not
+                # allowed for the select method in ordered dataframe, and column name is used to retain
+                # original columns.
+                deduplicated_column = quoted_identifier
+                deduplicated_quoted_identifier = quoted_identifier
+            deduplicated_columns.append(deduplicated_column)
+            deduplicated_quoted_identifiers.append(deduplicated_quoted_identifier)
+
+        column_identifiers_rename_map = dict(
+            zip(quoted_identifiers_to_deduplicate, deduplicated_quoted_identifiers)
+        )
+
+        deduplicated_ordered_frame = self.select(deduplicated_columns)
+
+        new_projected_columns_quoted_identifiers = [
+            column_identifiers_rename_map[quoted_identifier]
+            for quoted_identifier in original_projected_columns_quoted_identifiers
+        ]
+        # get the ordering columns for the right frame after rename
+        new_ordering_columns = [
+            OrderingColumn(
+                column_identifiers_rename_map[order_col.snowflake_quoted_identifier],
+                order_col.ascending,
+                order_col.na_last,
+            )
+            for order_col in self.ordering_columns
+        ]
+
+        new_row_position_snowflake_quoted_identifier = (
+            column_identifiers_rename_map[self.row_position_snowflake_quoted_identifier]
+            if self.row_position_snowflake_quoted_identifier
+            else None
+        )
+
+        new_row_count_snowflake_quoted_identifier = (
+            column_identifiers_rename_map.get(
+                self.row_count_snowflake_quoted_identifier,
+                self.row_count_snowflake_quoted_identifier,
+            )
+            if self.row_count_snowflake_quoted_identifier
+            else None
+        )
+
+        return OrderedDataFrame(
+            dataframe_ref=deduplicated_ordered_frame._dataframe_ref,
+            projected_column_snowflake_quoted_identifiers=new_projected_columns_quoted_identifiers,
+            ordering_columns=new_ordering_columns,
+            row_position_snowflake_quoted_identifier=new_row_position_snowflake_quoted_identifier,
+            row_count_snowflake_quoted_identifier=new_row_count_snowflake_quoted_identifier,
+        )
+
+    def join(
+        self,
+        right: "OrderedDataFrame",
+        left_on_cols: Optional[list[str]] = None,
+        right_on_cols: Optional[list[str]] = None,
+        how: JoinTypeLit = "inner",
+    ) -> "OrderedDataFrame":
+        """
+        Performs equi join of the specified type (``how``) with the current
+        DataFrame and another DataFrame (``right``) on a list of columns from left
+        and right OrderedDataFrame(``left_on`` and ``right_on``). Proper de-conflicting
+        happens on the right OrderedDataFrame columns to make sure there is no conflicting
+        column names between self and right.
+
+        ** NOTE that EQUAL_NULL is used as equality comparison instead of regular EQUAL comparison
+        operator(==), since this is use case in Snowpark pandas **
+
+        Args:
+            right: The other OrderedDataFrame to join.
+            left_on_cols: A list of column names from self OrderedDataFrame to be used for the join.
+            right_on_cols: A list of column names from right OrderedDataFrame to be used for the join.
+            how: We support the following join types:
+                - Inner join: "inner" (the default value)
+                - Left outer join: "left"
+                - Right outer join: "right"
+                - Full outer join: "outer"
+                - Cross join: "cross"
+
+            ** NOTE:
+                1) the length of left_on_cols and right_on_cols are required to be the same. If no left_on_cols
+                   and right_on_columns is provided, the join is performed with no join on expression, should be only
+                   used by cross join.
+                2) This interface is not the same as the interface provided by Snowpark dataframe, which allow arbitrary
+                   on expression. We restrict the support to only equvi join in ordered dataframe is because eqvi join
+                   is more efficient and which is the only required usage for now. Consider to support general join on
+                   expression in the future if needed.
+                3) when the join is a self-join on the row position columns, the join is skipped and
+                   we just select new de-duplicated columns from the right dataframe. The ordering columns
+                   and position columns of left dataframe are used for the result ordered dataframe.
+
+        Return:
+            An OrderedDataFrame representation of the join result with the following property
+                1) Projected columns in the order of left projected columns + right projected columns
+                2) For right outer join, the ordering columns preserves the right order, followed by left order.
+                    For other join methods, the ordering columns preserves the left order, followed by right order.
+
+        """
+        left_on_cols = left_on_cols or []
+        right_on_cols = right_on_cols or []
+        assert len(left_on_cols) == len(
+            right_on_cols
+        ), "left_on_cols and right_on_cols must be of same length"
+        _raise_if_identifier_not_exists(
+            left_on_cols,
+            self.projected_column_snowflake_quoted_identifiers,
+            "join left_on_cols",
+        )
+
+        _raise_if_identifier_not_exists(
+            right_on_cols,
+            right.projected_column_snowflake_quoted_identifiers,
+            "join right_on_cols",
+        )
+
+        is_join_needed = True
+        # join is not needed for `left`, `right`, `inner` and `outer` join for self join
+        # on row position column since row position column is a unique column.
+        if how in [
+            "left",
+            "right",
+            "inner",
+            "outer",
+        ] and self._is_self_join_on_row_position_column(
+            left_on_cols, right, right_on_cols
+        ):
+            is_join_needed = False
+
+        original_right_quoted_identifiers = (
+            right.projected_column_snowflake_quoted_identifiers
+        )
+        # De-duplicate the column identifiers of right against self (left), so that
+        # Snowpark doesn't perform any de-duplication on the result dataframe during join.
+        right = right._deduplicate_active_column_snowflake_quoted_identifiers(
+            self,
+            include_row_position_column=False,
+            include_row_count_column=False,
+        )
+        new_right_quoted_identifiers = (
+            right.projected_column_snowflake_quoted_identifiers
+        )
+
+        # the new projected columns are set to self._projected_columns + right._projected_column after de-conflict
+        projected_column_snowflake_quoted_identifiers = (
+            self.projected_column_snowflake_quoted_identifiers
+            + right.projected_column_snowflake_quoted_identifiers
+        )
+
+        if not is_join_needed:
+            # if no join needed, we simply return the deduplicated right frame with the projected columns
+            # set to the left.projected_column_snowflake_quoted_identifiers and the deduplicated the right
+            # projected_column_snowflake_quoted_identifiers.
+            return OrderedDataFrame(
+                right._dataframe_ref,
+                projected_column_snowflake_quoted_identifiers=projected_column_snowflake_quoted_identifiers,
+                ordering_columns=self.ordering_columns,
+                row_position_snowflake_quoted_identifier=self.row_position_snowflake_quoted_identifier,
+                row_count_snowflake_quoted_identifier=self.row_count_snowflake_quoted_identifier,
+            )
+
+        # reproject the snowpark dataframe with only necessary columns
+        left_snowpark_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True
+        )
+        right_snowpark_dataframe_ref = right._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True
+        )
+
+        right_identifiers_rename_map = dict(
+            zip(original_right_quoted_identifiers, new_right_quoted_identifiers)
+        )
+        # get the new mapped right on identifier
+        right_on_cols = [right_identifiers_rename_map[key] for key in right_on_cols]
+
+        # Generate sql ON clause 'EQUAL_NULL(col1, col2) and EQUAL_NULL(col3, col4) ...'
+        on = None
+        for left_col, right_col in zip(left_on_cols, right_on_cols):
+            eq = Column(left_col).equal_null(Column(right_col))
+            on = eq if on is None else on & eq
+
+        # If we are doing a cross join, `on` cannot be specified.
+        if how != "cross":
+            snowpark_dataframe = left_snowpark_dataframe_ref.snowpark_dataframe.join(
+                right_snowpark_dataframe_ref.snowpark_dataframe, on, how
+            )
+        else:
+            snowpark_dataframe = left_snowpark_dataframe_ref.snowpark_dataframe.join(
+                right_snowpark_dataframe_ref.snowpark_dataframe, how=how
+            )
+
+        # for right join, we preserve the right order first, then left order.
+        # for all join type, left order is preserved first, then right order.
+        if how == "right":
+            ordering_columns = right.ordering_columns + self.ordering_columns
+        else:
+            ordering_columns = self.ordering_columns + right.ordering_columns
+
+        return OrderedDataFrame(
+            DataFrameReference(
+                snowpark_dataframe,
+                # the result of join retains column quoted identifier of both left + right
+                snowflake_quoted_identifiers=left_snowpark_dataframe_ref.snowflake_quoted_identifiers
+                + right_snowpark_dataframe_ref.snowflake_quoted_identifiers,
+            ),
+            projected_column_snowflake_quoted_identifiers=projected_column_snowflake_quoted_identifiers,
+            ordering_columns=ordering_columns,
+        )
+
+    def _has_same_base_ordered_dataframe(self, other: "OrderedDataFrame") -> bool:
+        """
+        Check if self has the same base ordered dataframe as `other`.
+
+        Two OrderedDataFrame have the same base ordered dataframe if and only if they use the same dataframe reference
+        and have the same ordering columns.
+        """
+        return (
+            self._dataframe_ref == other._dataframe_ref
+            and self.ordering_columns == other.ordering_columns
+        )
+
+    def _is_self_align_on_same_cols(
+        self,
+        left_on_cols: list[str],
+        right: "OrderedDataFrame",
+        right_on_cols: list[str],
+    ) -> bool:
+        """
+        Check if the self (left) OrderedDataFrame and right OrderedDataFrame have the same base ordered dataframe,
+        and align on the same columns.
+
+        Args:
+            left_on_cols: the align on columns for self (left).
+            right: the right OrderedDataFrame for align.
+            right_on_cols: the align on columns for right.
+
+        Returns:
+            True if self (left) and right shares the same Snowpark dataframe, and aligns on the same columns.
+        """
+        if self._has_same_base_ordered_dataframe(right):
+            right_cols_to_verify = right_on_cols
+            if (
+                self.row_position_snowflake_quoted_identifier
+                and right.row_position_snowflake_quoted_identifier
+                and right.row_position_snowflake_quoted_identifier in right_on_cols
+            ):
+                # When the self (left) and the right has the same base ordered dataframe, the row position
+                # column of left and right can be treated as the same column even if they have different
+                # identifiers, because they are basically duplication of the row position column. For example:
+                # with ordered dataframe df1 = df['"A"', '"B"'] and ordered dataframe df2 = df['"B"', '"C"'],
+                # those two ordered dataframe are directly derived from the same ordered dataframe, and therefor
+                # has the same base ordered dataframe. Then we generate a row position column for df1 and df2
+                # separately and align on the row position columns of df1 and df2, the row position column of df1
+                # and df2 should have the same value since df1 and df2 has the same based ordered dataframe (which
+                # uses the same dataframe reference and ordering).
+                # Replaces the right.row_position_snowflake_quoted_identifier in right_on_cols with
+                # self.row_position_snowflake_quoted_identifier for later comparison.
+                right_cols_to_verify = [
+                    self.row_position_snowflake_quoted_identifier
+                    if quoted_identifier
+                    == right.row_position_snowflake_quoted_identifier
+                    else quoted_identifier
+                    for quoted_identifier in right_on_cols
+                ]
+
+            return left_on_cols == right_cols_to_verify
+        return False
+
+    def align(
+        self,
+        right: "OrderedDataFrame",
+        left_on_cols: list[str],
+        right_on_cols: list[str],
+        how: AlignTypeLit = "outer",
+    ) -> "OrderedDataFrame":
+        """
+        Performs align operation of the specified join type (``how``) with the current
+        DataFrame and another DataFrame (``right``) on a list of columns from left
+        and right OrderedDataFrame(``left_on`` and ``right_on``). Proper de-conflicting
+        happens on the right OrderedDataFrame columns to make sure there is no conflicting
+        column names between self and right.
+
+        If the left_on_cols of left(self) exact matches with the right_on_cols of right,
+        the align operation merges the frame row by row, which is the same effect as join
+        on row position column. Otherwise, it exposes the same behavior as regular join with
+        given method (`how`).
+
+        Columns from two frames are considered `exact match` if both have exact same values in
+        exact same order. For example, [1, 2, 1] and [1, 2, 1] is considered the same, but [1, 2, 1]
+        and [1, 1, 2] are not considered as the same.
+
+        For the final result order, it follows the following rule:
+        1) If the left_on_cols of left(self) exact matches with the right_on_cols of right, the
+            result ordered frame preserves the order from the original left frame. (Note, there
+            is an optimization that when the alignment is a self align on the same columns, the
+            ordering columns is set exactly the same as the left)
+        2) Otherwise, the order depends on the methods:
+            a) left: preserves the left order, followed by right order
+            b) coalesce: same as left. preserves the left order, followed by right order
+            c) outer: sort lexicographically on the union of left_on_cols and right_on_cols
+        Note that the above ordering behavior matches the align ordering behavior of pandas also. This
+        decision is made because the align operator is mainly driven by pandas usage at this moment,
+        there is no requirement that the order for the operator in ordering dataframe must match pandas.
+
+        The projected columns of aligned result is guaranteed to be in order of
+        <self/left projected columns> + <right projected columns with de-duplicate>
+
+        Note: A join is typically required for align operator, however, if the two ordered dataframe has the same
+            base ordered dataframe and aligns on the same columns, no join needs to be performed.
+
+        Args:
+            right: right DataFrame.
+            left_on_cols: A list of column names from self OrderedDataFrame to be used for align.
+            right_on_cols: A list of column names from right OrderedDataFrame to be used for align.
+            how: We support the following align/join types:
+                - "outer": Full outer align (default value)
+                - "left": Left outer align
+                - "coalesce": If left frame is not empty perform left outer align
+                  otherwise perform right outer align. When left frame is empty, the
+                  left_on column is replaced with the right_on column in the result.
+                  This method can only be used when left_on and right_on type are
+                  compatible, otherwise an error will be thrown.
+        Returns:
+            Aligned OrderedDataFrame.
+        """
+
+        assert len(left_on_cols) == len(
+            right_on_cols
+        ), "left_on_cols and right_on_cols must be of same length"
+
+        _raise_if_identifier_not_exists(
+            left_on_cols,
+            self.projected_column_snowflake_quoted_identifiers,
+            "align left_on_cols",
+        )
+
+        _raise_if_identifier_not_exists(
+            right_on_cols,
+            right.projected_column_snowflake_quoted_identifiers,
+            "align right_on_cols",
+        )
+
+        if self._is_self_align_on_same_cols(left_on_cols, right, right_on_cols):
+            # when it is self align on the same columns, there is no need to perform join, we simply deduplicate
+            # the right ordered dataframe to produce the necessary projected columns for the final result.
+            aligned_ordered_frame = (
+                right._deduplicate_active_column_snowflake_quoted_identifiers(
+                    self,
+                    include_ordering_columns=False,
+                    include_row_position_column=False,
+                    include_row_count_column=False,
+                )
+            )
+            return OrderedDataFrame(
+                dataframe_ref=aligned_ordered_frame._dataframe_ref,
+                projected_column_snowflake_quoted_identifiers=self.projected_column_snowflake_quoted_identifiers
+                + aligned_ordered_frame.projected_column_snowflake_quoted_identifiers,
+                ordering_columns=self.ordering_columns,
+                row_position_snowflake_quoted_identifier=self.row_position_snowflake_quoted_identifier,
+                row_count_snowflake_quoted_identifier=self.row_count_snowflake_quoted_identifier,
+            )
+
+        from snowflake.snowpark.modin.plugin._internal.join_utils import (
+            JoinOrAlignOrderedDataframeResultHelper,
+        )
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            ORDERING_COLUMN_LABEL,
+        )
+
+        original_left_projected_column_quoted_identifiers = (
+            self.projected_column_snowflake_quoted_identifiers
+        )
+        original_right_projected_column_quoted_identifiers = (
+            right.projected_column_snowflake_quoted_identifiers
+        )
+        # generate row position column for self and right, which is needed for align on column equivalence check
+        left = self.ensure_row_position_column()
+        right = right.ensure_row_position_column()
+        # perform outer join
+        joined_ordered_frame = left.join(
+            right,
+            left_on_cols=left_on_cols,
+            right_on_cols=right_on_cols,
+            how="outer",
+        )
+
+        sort = False
+        if how == "outer":
+            sort = True
+        result_helper = JoinOrAlignOrderedDataframeResultHelper(
+            left,
+            right,
+            joined_ordered_frame,
+            left_on_cols,
+            right_on_cols,
+            how=how,
+            sort=sort,
+        )
+        # get the ordered dataframe with correct order based on sort
+        joined_ordered_frame = result_helper.join_or_align_result
+        # update left_on_cols and right_on_cols
+        left_on_cols = result_helper.map_left_quoted_identifiers(left_on_cols)
+        right_on_cols = result_helper.map_right_quoted_identifiers(right_on_cols)
+
+        result_projected_column_snowflake_quoted_identifiers = (
+            result_helper.map_left_quoted_identifiers(
+                original_left_projected_column_quoted_identifiers
+            )
+            + result_helper.map_right_quoted_identifiers(
+                original_right_projected_column_quoted_identifiers
+            )
+        )
+
+        # we have called ensure_row_position_column for the left and right frame above to make sure a
+        # row positions column is generated for the left and right frame. Therefore,
+        # row_position_snowflake_quoted_identifier can not be None for the left and right frame.
+        assert left.row_position_snowflake_quoted_identifier is not None
+        assert right.row_position_snowflake_quoted_identifier is not None
+        left_row_pos = Column(
+            result_helper.map_left_quoted_identifiers(
+                [left.row_position_snowflake_quoted_identifier]
+            )[0]
+        )
+        right_row_pos = Column(
+            result_helper.map_right_quoted_identifiers(
+                [right.row_position_snowflake_quoted_identifier]
+            )[0]
+        )
+        # We use over() expression over all the data in frame. This adds a new column
+        # with count where all values are same.  This way we avoid triggering any eager
+        # evaluation.
+        # Coalesce with lit(0), otherwise row_cont will be NULL for empty frame.
+        left_count = coalesce(max_(left_row_pos).over() + 1, lit(0))
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            ROW_COUNT_COLUMN_LABEL,
+        )
+
+        left_count_identifier = (
+            joined_ordered_frame.generate_snowflake_quoted_identifiers(
+                pandas_labels=[ROW_COUNT_COLUMN_LABEL + "_left"]
+            )[0]
+        )
+        # Add count of left frame as column. This is used in filter condition for "left"
+        # align
+        left_count_column = Column(left_count_identifier)
+        extra_columns_to_append = [left_count.as_(left_count_identifier)]
+        # Coalesce with lit(0), otherwise row_cont will be NULL for empty frame.
+        right_count = coalesce(max_(right_row_pos).over() + 1, lit(0))
+        eq_row_pos_count = sum_(iff(left_row_pos == right_row_pos, 1, 0)).over()
+
+        ordering_columns = joined_ordered_frame.ordering_columns
+        # 'col_matching_expr' represents if left_on_cols is an exact match with right_on_cols.
+        # Add this as new column to frame. Note that ALL values for this column are same.
+        # It will be TRUE if left_on_cols matches with right_on_cols otherwise it will be FALSE.
+        col_matching_identifier = (
+            joined_ordered_frame.generate_snowflake_quoted_identifiers(
+                pandas_labels=["col_matching"]
+            )[0]
+        )
+        col_matching_expr = (
+            (left_count == right_count) & (left_count == eq_row_pos_count)
+        ).as_(col_matching_identifier)
+        col_matching_column = Column(col_matching_identifier)
+        extra_columns_to_append.append(col_matching_expr)
+
+        # Define the final ordering column.
+        # As we mentioned in docstring, when left_on_cols and right_on_cols matches, the left
+        # and right frame is merged row by row with the original order, and the row order of
+        # original frame is retained.
+        # However, when left_on_cols and right_on_cols doesn't match, we need to sort lexicographically
+        # on the join keys for `outer` align, and preserve left order followed by right order for `left` align.
+        # This means the ordering column changes based on the result of column matching situation. Due
+        # to lazy evaluation, we do not know the column matching situation util the query is evaluated.
+        # In order to achieve this, we add a column 'ordering_col' which is set to left row position if
+        # input frames have matching left_on_cols and right_on_cols, otherwise this will be set to constant
+        # 1 (a dummy ordering column has no effect).
+        # Note that this is only needed by `outer` methods because it needs to sort on join keys. For `left`,
+        # preserve the left order followed by right order can give the correct order for both matching case
+        # and non-matching case.
+        if how == "outer":
+            global_order_col_identifier = (
+                joined_ordered_frame.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[ORDERING_COLUMN_LABEL],
+                    excluded=[col_matching_identifier],
+                )[0]
+            )
+            global_order_expr = iff(col_matching_column, left_row_pos, lit(1)).as_(
+                global_order_col_identifier
+            )
+            extra_columns_to_append = extra_columns_to_append + [global_order_expr]
+            ordering_columns = [
+                OrderingColumn(global_order_col_identifier)
+            ] + ordering_columns
+
+        joined_ordered_frame = joined_ordered_frame.select(
+            joined_ordered_frame.projected_column_snowflake_quoted_identifiers
+            + extra_columns_to_append
+        )
+
+        filter_expression = not_(col_matching_column) | (left_row_pos == right_row_pos)
+        # If left_on_cols matches with right_on_cols, include only the rows when left row
+        # position is same as right row position.
+        # If left_on_cols does not match right_on_cols, all values in 'col_matching' column will
+        # be False, so we include all rows.
+        # Example 1 (matching columns):
+        # left:
+        # E  row_position, A
+        # 2  0             a
+        # 1  1             b
+        # 2  2             c
+        #
+        # right:
+        # F  row_position, B
+        # 2  0             d
+        # 1  1             e
+        # 2  2             f
+        # left outer join right on column E and F
+        # (Note some columns here are only for illustration, they are not actually added to
+        # frame)
+        # E   left_row_pos, right_row_pos, A, B, eq_row_pos, eq_row_pos_count, col_matching, filter,  order_col, left_row_count
+        # 2   0             0              a  d  1           3                 True           True    0          3
+        # 2   0             2              a  f  0           3                 True           False   0          3
+        # 1   1             1              b  e  1           3                 True           True    1          3
+        # 2   2             0              c  d  0           3                 True           False   2          3
+        # 2   2             2              c  f  1           3                 True           True    2          3
+        #
+        # Example 2 (not matching columns)
+        # left:
+        # E  row_position, A
+        # 2  0             a
+        # 1  1             b
+        # 2  2             c
+        #
+        # right:
+        # F  row_position, B
+        # 2  0             d
+        # 3  1             e
+        # 2  2             f
+        # left outer join right on column E and F
+        # (Note some columns here are only for illustration, they are not actually added to
+        # frame)
+        # E   left_row_pos, right_row_pos, A,   B,   eq_row_pos, eq_row_pos_count, col_matching, filter,  order_col, left_row_count
+        # 2   0             0              a    d    1           2                 False         True     1          3
+        # 2   0             2              a    f    0           2                 False         True     1          3
+        # 1   1             None           b    None 0           2                 False         True     1          3
+        # 2   2             0              c    d    0           2                 False         True     1          3
+        # 2   2             2              c    f    1           2                 False         True     1          3
+        # 3   None          1              None e    0           2                 False         True     1          3
+
+        # Example 3: (left is empty)
+        # left:
+        # E  row_position, A
+        #  << now rows >>
+        #
+        # right:
+        # F  row_position, B
+        # 2  0             d
+        # 3  1             e
+        # 4  2             f
+        # left outer join right on column E and F
+        # E    F  left_row_pos, right_row_pos, A,   B,   eq_row_pos, eq_row_pos_count, col_matching, filter,  order_col, left_row_count
+        # NULL 2  NULL          0             NULL  d    False       0                 False         True     1          0
+        # NULL 3  NULL          1             NULL  e    False       0                 False         True     1          0
+        # NULL 4  NULL          2             NULL  f    False       0                 False         True     1          0
+
+        if how == "coalesce":
+            # For left align if left frame row count is 0 we convert this to right join
+            # behavior by filtering out rows where right_row_pos is null. Otherwise,
+            # we provide left join behavior by filtering out rows where left_row_pos is
+            # null.
+            filter_expression = filter_expression & iff(
+                left_count_column == 0,
+                right_row_pos.is_not_null(),  # right join
+                left_row_pos.is_not_null(),  # left join
+            )
+            from snowflake.snowpark.modin.plugin._internal.utils import (
+                unquote_name_if_quoted,
+            )
+
+            # We also update left_on_cols to right_on_cols if left frame row count is 0
+            # Generate new identifiers for 'left_on_cols'. The new columns keep
+            # the left_on value if left is not empty, otherwise keep values from
+            # corresponding right_on column values.
+            new_left_on_cols = (
+                joined_ordered_frame.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[unquote_name_if_quoted(c) for c in left_on_cols]
+                )
+            )
+            select_list = []
+            for identifier in result_projected_column_snowflake_quoted_identifiers:
+                if identifier in left_on_cols:
+                    # Using the example above new left_on column be generated as
+                    # iff(left_count == 0, F, E)
+                    # When left_count is 0, F (from right frame) becomes new left_on
+                    # otherwise E (from left frame) remains left_on column.
+                    i = left_on_cols.index(identifier)
+                    select_list.append(
+                        iff(
+                            left_count_column == 0,
+                            Column(right_on_cols[i]),
+                            Column(left_on_cols[i]),
+                        ).as_(new_left_on_cols[i])
+                    )
+                else:
+                    select_list.append(identifier)
+        elif how == "left":
+            filter_expression = filter_expression & left_row_pos.is_not_null()
+            select_list = result_projected_column_snowflake_quoted_identifiers
+        else:  # outer
+            select_list = result_projected_column_snowflake_quoted_identifiers
+
+        joined_ordered_frame = joined_ordered_frame.filter(filter_expression).sort(
+            ordering_columns
+        )
+
+        # call select to make sure only the result_projected_column_snowflake_quoted_identifiers are projected
+        # in the join result
+        return joined_ordered_frame.select(select_list)
+
+    def filter(self, expr: ColumnOrSqlExpr) -> "OrderedDataFrame":
+        """
+        Filters rows based on the specified conditional expression.
+        Note that ordering columns will not be changed.
+
+        See detailed docstring in Snowpark DataFrame's filter.
+        """
+        projected_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True
+        )
+        snowpark_dataframe = projected_dataframe_ref.snowpark_dataframe.filter(expr)
+        return OrderedDataFrame(
+            DataFrameReference(
+                snowpark_dataframe,
+                # same columns are retained after filtering
+                snowflake_quoted_identifiers=projected_dataframe_ref.snowflake_quoted_identifiers,
+            ),
+            projected_column_snowflake_quoted_identifiers=projected_dataframe_ref.snowflake_quoted_identifiers,
+            ordering_columns=self.ordering_columns,
+        )
+
+    def limit(self, n: int, offset: int = 0, sort: bool = True) -> "OrderedDataFrame":
+        """
+        Returns a new DataFrame that contains at most ``n`` rows from the current
+        DataFrame, skipping ``offset`` rows from the beginning (similar to LIMIT and OFFSET in SQL).
+        Note that ordering columns will not be changed. Once difference of this limit and
+        Snowpark DataFrame's limit is that we will sort the DataFrame first and return ``n`` ordered
+        records, instead of returning ``n`` arbitrary records if sort is True.
+
+        See detailed docstring in Snowpark DataFrame's limit.
+        """
+        projected_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True, sort=sort
+        )
+        snowpark_dataframe = projected_dataframe_ref.snowpark_dataframe.limit(
+            n=n, offset=offset
+        )
+        return OrderedDataFrame(
+            DataFrameReference(
+                snowpark_dataframe,
+                # the same columns are retained for limit
+                snowflake_quoted_identifiers=projected_dataframe_ref.snowflake_quoted_identifiers,
+            ),
+            projected_column_snowflake_quoted_identifiers=projected_dataframe_ref.snowflake_quoted_identifiers,
+            ordering_columns=self.ordering_columns,
+        )
+
+    @property
+    def write(self) -> DataFrameWriter:
+        """
+        Returns a new DataFrameWriter object that you can use to write the data in the DataFrame to
+        a Snowflake database or a stage location.
+
+        Note that this DataFrameWriter object contains the ordering columns and row position columns,
+        typically used for caching intermediate result in Snowpark pandas.
+
+        See detailed docstring in Snowpark DataFrame's write.
+        """
+        return self.to_projected_snowpark_dataframe(
+            include_ordering_columns=True,
+            include_row_position_column=True,
+            include_row_count_column=False,
+        ).write
+
+    def __getitem__(self, item: str) -> Column:
+        return self._dataframe_ref.snowpark_dataframe[item]
+
+    def collect(
+        self,
+        *,
+        statement_params: Optional[dict[str, str]] = None,
+        block: bool = True,
+        log_on_exception: bool = False,
+        case_sensitive: bool = True,
+    ) -> list[Row]:
+        """
+        Executes the query representing this DataFrame and returns the result as a
+        list of Row objects.
+        The result will only contain project columns and ordered by ordering columns.
+
+        See detailed docstring in Snowpark DataFrame's collect.
+        """
+        snowpark_dataframe = self.to_projected_snowpark_dataframe(sort=True)
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            get_default_snowpark_pandas_statement_params,
+        )
+
+        if statement_params is None:
+            statement_params = get_default_snowpark_pandas_statement_params()
+        else:
+            statement_params.update(get_default_snowpark_pandas_statement_params())
+        return snowpark_dataframe.collect(
+            statement_params=statement_params,
+            block=block,
+            log_on_exception=log_on_exception,
+            case_sensitive=case_sensitive,
+        )
+
+    def to_pandas(
+        self,
+        *,
+        statement_params: Optional[dict[str, str]] = None,
+        block: bool = True,
+        **kwargs: dict[str, Any],
+    ) -> pandas.DataFrame:
+        """
+        Converts a DataFrame to a native pandas DataFrame.
+        The result will only contain project columns and ordered by ordering columns.
+
+        See detailed docstring in Snowpark DataFrame's to_pandas.
+        """
+        # Although the query will be SELECT ... FROM (... ORDER BY ...)
+        # the final result will still be ordered because there is an implicit
+        # guarantee on the server side
+        snowpark_dataframe = self.to_projected_snowpark_dataframe(sort=True)
+        from snowflake.snowpark.modin.plugin._internal.utils import (
+            get_default_snowpark_pandas_statement_params,
+        )
+
+        if statement_params is None:
+            statement_params = get_default_snowpark_pandas_statement_params()
+        else:
+            statement_params.update(get_default_snowpark_pandas_statement_params())
+        return snowpark_dataframe.to_pandas(
+            statement_params=statement_params, block=block, **kwargs
+        )
+
+    def _to_projected_snowpark_dataframe_reference(
+        self,
+        include_ordering_columns: bool = False,
+        include_row_position_column: bool = False,
+        include_row_count_column: bool = False,
+        sort: bool = False,
+        col_mapper: Optional[dict[str, str]] = None,
+    ) -> DataFrameReference:
+        """
+        Returns a dataframe reference with the referred Snowpark dataframe that
+            1) selects the projected columns and ordering columns if `include_ordering_columns` is True
+            2) sorted with the ordering columns if `sort` is True
+
+        Args:
+            include_ordering_columns: bool. Whether to include ordering columns in the result Snowpark Dataframe if
+                not in projected columns.
+            include_row_position_column: bool. Whether to include row position column in the result Snowpark Dataframe
+                if not in projected columns.
+            include_row_count_column: bool. Whether to include row count column in the result Snowpark Dataframe
+                if not in projected columns.
+            sort: bool. Whether sort the result Snowpark Dataframe based on the ordering columns.
+            col_mapper: Optional[Dict[str, str]]. A dictionary mapping from existing snowflake quoted identifiers
+                to new snowflake quoted identifiers for the result Snowpark dataframe.
+
+        Returns:
+            A DataFrameReference with the Snowpark dataframe with the selected columns and quoted identifier cache
+            set correctly.
+        """
+        snowpark_dataframe = self._dataframe_ref.snowpark_dataframe
+        if sort:
+            snowpark_dataframe = snowpark_dataframe.sort(
+                self._ordering_snowpark_columns()
+            )
+
+        columns_quoted_identifiers = (
+            self._get_active_column_snowflake_quoted_identifiers(
+                include_ordering_columns=include_ordering_columns,
+                include_row_position_column=include_row_position_column,
+                include_row_count_column=include_row_count_column,
+            )
+        )
+        if col_mapper is not None:
+            columns = []
+            result_columns_quoted_identifiers = []
+            # perform rename to the columns if col_mapper is provided
+            for quoted_identifier in columns_quoted_identifiers:
+                # project the columnname to an updated alias
+                columns.append(
+                    Column(quoted_identifier).as_(
+                        col_mapper.get(quoted_identifier, quoted_identifier)
+                    )
+                )
+                result_columns_quoted_identifiers.append(
+                    col_mapper.get(quoted_identifier, quoted_identifier)
+                )
+        else:
+            # No column renaming needed
+            columns = columns_quoted_identifiers
+            result_columns_quoted_identifiers = columns_quoted_identifiers
+
+        return DataFrameReference(
+            snowpark_dataframe.select(columns),
+            snowflake_quoted_identifiers=result_columns_quoted_identifiers,
+        )
+
+    def to_projected_snowpark_dataframe(
+        self,
+        include_ordering_columns: bool = False,
+        include_row_position_column: bool = False,
+        include_row_count_column: bool = False,
+        sort: bool = False,
+        col_mapper: Optional[dict[str, str]] = None,
+    ) -> SnowparkDataFrame:
+        """
+        Returns the Snowpark dataframe with all projected columns and ordering columns if `include_ordering_columns`
+        is True. The Snowpark dataframe is sorted with the ordering columns if `sort` is True.
+
+        Args:
+            include_ordering_columns: bool. Whether to include ordering columns in the result Snowpark Dataframe if
+                not in projected columns.
+            include_row_position_column: bool. Whether to include row position column in the result Snowpark Dataframe
+                if not in projected columns.
+            include_row_count_column: bool. Whether to include row count column in the result Snowpark Dataframe
+                if not in projected columns.
+            sort: bool. Whether sort the result Snowpark Dataframe based on the ordering columns.
+            col_mapper: Optional[Dict[str, str]]. A dictionary mapping from existing snowflake quoted identifiers
+                to new snowflake quoted identifiers for the result Snowpark dataframe.
+
+        Returns:
+            A SnowparkDataFrame with the selected columns renamed properly.
+        """
+        return self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns,
+            include_row_position_column,
+            include_row_count_column,
+            sort,
+            col_mapper,
+        ).snowpark_dataframe
+
+    def _is_self_join_on_row_position_column(
+        self,
+        left_on_cols: list[str],
+        right: "OrderedDataFrame",
+        right_on_cols: list[str],
+    ) -> bool:
+        """
+        Returns True if a join is a self-join on the row position columns of two ordered dataframe.
+        Specifically,
+            - two ordered dataframes are the same base ordered dataframe, i.e. same dataframe reference and ordering column
+            - left_on_cols is the row position column of left (self) dataframe and right_on_cols is the row position column of right dataframe,
+              which also means the row position columns of two dataframes are not None.
+        """
+        return (
+            self._has_same_base_ordered_dataframe(right)
+            and (
+                self.row_position_snowflake_quoted_identifier is not None
+                and left_on_cols == [self.row_position_snowflake_quoted_identifier]
+            )
+            and (
+                right.row_position_snowflake_quoted_identifier is not None
+                and right_on_cols == [right.row_position_snowflake_quoted_identifier]
+            )
+        )
+
+    def sample(self, n: Optional[int], frac: Optional[float]) -> "OrderedDataFrame":
+        """
+        Sample rows on an OrderedDataFrame
+        Args:
+            n: Number of rows to return. Cannot be used with `frac`.
+            frac: Fraction of rows to return. Cannot be used with `n`.
+
+        Returns:
+            OrderedDataFrame sample. Note that the result's ordering and projected columns are the same as the original
+            dataframe.
+        """
+        projected_dataframe_ref = self._to_projected_snowpark_dataframe_reference(
+            include_ordering_columns=True
+        )
+
+        snowpark_dataframe = projected_dataframe_ref.snowpark_dataframe.sample(
+            n=n, frac=frac
+        )
+        from snowflake.snowpark.modin.plugin._internal.utils import cache_result
+
+        # Note: the returned frame is a cached result to make the sampled result deterministic. If we don't cache it,
+        # then for example:
+        # df_s = df.sample(frac=0.5)
+        # assert df_s.index == df_s.index may fail because both the LHS and RHS will call the sample method during
+        # evaluation and the results won't be deterministic.
+        return cache_result(
+            OrderedDataFrame(
+                DataFrameReference(
+                    snowpark_dataframe,
+                    # same columns are retained after sampling
+                    snowflake_quoted_identifiers=projected_dataframe_ref.snowflake_quoted_identifiers,
+                ),
+                projected_column_snowflake_quoted_identifiers=self.projected_column_snowflake_quoted_identifiers,
+                ordering_columns=self.ordering_columns,
+            )
+        )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/pivot_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/pivot_utils.py
new file mode 100644
index 00000000000..3b27cb2609c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/pivot_utils.py
@@ -0,0 +1,1413 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from collections import namedtuple
+from collections.abc import Generator, Hashable
+from functools import reduce
+from itertools import product
+from typing import Any, Callable, Optional, Union
+
+from pandas._typing import AggFuncType, AggFuncTypeBase, Scalar
+
+from snowflake.snowpark.column import Column as SnowparkColumn
+from snowflake.snowpark.functions import (
+    coalesce,
+    col,
+    count,
+    iff,
+    min as min_,
+    object_construct,
+    sum as sum_,
+)
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    get_pandas_aggr_func_name,
+    get_snowflake_agg_func,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    get_groups_for_ordered_dataframe,
+)
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    TempObjectType,
+    append_columns,
+    cache_result,
+    convert_snowflake_string_constant_to_python_string,
+    extract_pandas_label_from_object_construct_snowflake_quoted_identifier,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+    from_pandas_label,
+    get_distinct_rows,
+    is_supported_snowflake_pivot_agg_func,
+    pandas_lit,
+    random_name_for_temp_object,
+    to_pandas_label,
+)
+from snowflake.snowpark.modin.plugin._typing import (
+    LabelComponent,
+    LabelTuple,
+    PandasLabelToSnowflakeIdentifierPair,
+)
+from snowflake.snowpark.types import DoubleType, StringType
+
+TEMP_PIVOT_COLUMN_PREFIX = "PIVOT_"
+DEFAULT_MARGINS_NAME = "All"
+
+PivotAggrGrouping = namedtuple(
+    "PivotAggrGrouping",
+    ["aggfunc", "prefix_label", "aggr_label_identifier_pair"],
+)
+
+
+def pivot_helper(
+    pivot_frame: InternalFrame,
+    pivot_aggr_groupings: list[PivotAggrGrouping],
+    expand_with_cartesian_product: bool,
+    sort_first_level: bool,
+    columns: Any,
+    groupby_snowflake_quoted_identifiers: list[str],
+    pivot_snowflake_quoted_identifiers: list[str],
+    index: Optional[list],
+) -> InternalFrame:
+    """
+    Helper function that that performs a full pivot on an InternalFrame.
+
+    Args:
+        pivot_frame: Original InternalFrame to pivot.
+        pivot_aggr_groupings: A list of PivotAggrGroupings that define the aggregations to apply.
+        expand_with_cartesian_product: Whether to ensure the cartesian product of index/groupby rows.
+        sort_first_level: Whether to sort the first level of the pandas labels explicitly.
+        columns: The columns argument passed to `pivot_table`. Will become the pandas labels for the data column index.
+        groupby_snowflake_quoted_identifiers: Group by identifiers
+        pivot_snowflake_quoted_identifiers: Pivot identifiers
+        index: The index argument passed to `pivot_table` if specified. Will become the pandas labels for the index column.
+    Returns:
+        InternalFrame
+        The result of performing the pivot.
+    """
+    ordered_dataframe = pivot_frame.ordered_dataframe
+    # We may call snowpark dynamic pivot multiple times for a single call to `pivot_table` since pandas pivot
+    # supports performing multiple aggregation functions in a single call, and each aggregation function can be
+    # performed on multiple columns, whereas SQL pivot supports performing a single aggregation function on a single
+    # column - therefore, we need to  call dynamic pivot once for every <aggregation_function, column> pairing.
+    # TODO(SNOW-916206): Because we call snowpark dynamic pivot multiple times, we first materialize the original
+    # snowpark dataframe, to avoid repeating materialize on each internal single pivot call.
+    # In some cases the snowpark dataframe is backed by a transient temporary table, and if so, will not exist
+    # at a later time when the schema is retrieved.  For now, we will materialize the source dataframe if there
+    # are any post actions (like dropping the transient temp table).
+    if ordered_dataframe.queries.get("post_actions"):
+        ordered_dataframe = cache_result(ordered_dataframe)
+
+    last_ordered_dataframe = None
+    data_column_pandas_labels: list[Hashable] = []
+    data_column_snowflake_quoted_identifiers: list[str] = []
+
+    # To generate the correct multi-level pivot_table output we need several nested loops.
+    # 1. Loop through list of aggregation values
+    # 2. Loop through list of aggregation functions relevant to aggregation value.
+    #
+    # Note that order of (1) and (2) may be reversed in some cases, so we call a specialized generator to
+    # generate the correct ordering here. The order is reversed when the aggregation functions passed in
+    # to `pivot_table` is a list - as then the outermost layer of the index for the data columns must be
+    # the aggregation function. E.g.:
+    # In [1]: import pandas as native_pd
+
+    # In [2]: df = native_pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
+    #    ...:                          "bar", "bar", "bar", "bar"],
+    #    ...:                    "B": ["one", "one", "one", "two", "two",
+    #    ...:                          "one", "one", "two", "two"],
+    #    ...:                    "C": ["small", "large", "large", "small",
+    #    ...:                          "small", "large", "small", "small",
+    #    ...:                          "large"],
+    #    ...:                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+    #    ...:                    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
+    #
+    # In [3]: table = native_pd.pivot_table(df, values=['D', 'E'], index=['A', 'B'],
+    #    ...:                        columns=['C'], aggfunc={"D": ["sum", "max"], "E": ["sum", "max"]})
+    #    ...: table
+    # Out[3]:
+    #             D                       E
+    #           max         sum         max         sum
+    # C       large small large small large small large small
+    # A   B
+    # bar one   4.0   5.0   4.0   5.0   6.0   8.0   6.0   8.0
+    #     two   7.0   6.0   7.0   6.0   9.0   9.0   9.0   9.0
+    # foo one   2.0   1.0   4.0   1.0   5.0   2.0   9.0   2.0
+    #     two   NaN   3.0   NaN   6.0   NaN   6.0   NaN  11.0
+    #
+    # In [4]: table = native_pd.pivot_table(df, values=['D', 'E'], index=['A', 'B'],
+    #    ...:                        columns=['C'], aggfunc=["sum", "max"])
+    #    ...: table
+    # Out[4]:
+    #           sum                     max
+    #             D           E           D           E
+    # C       large small large small large small large small
+    # A   B
+    # bar one   4.0   5.0   6.0   8.0   4.0   5.0   6.0   8.0
+    #     two   7.0   6.0   9.0   9.0   7.0   6.0   9.0   9.0
+    # foo one   4.0   1.0   9.0   2.0   2.0   1.0   5.0   2.0
+    #     two   NaN   6.0   NaN  11.0   NaN   3.0   NaN   6.0
+    #
+    # In the first example above, we iterate through the aggregation values first, but in the second example, we must
+    # iterate through the aggregation functions first.
+    #
+    # 3. Perform pivot on the pivot columns for this aggregation value + aggfunc combination.
+    #
+    # The multi-level pandas prefix label that includes the aggregation value and function labels is also
+    # constructed and passed into the single pivot operation to prepend the remaining of the pandas labels.
+    for pivot_aggr_grouping in pivot_aggr_groupings:
+        existing_snowflake_quoted_identifiers = groupby_snowflake_quoted_identifiers
+        if last_ordered_dataframe is not None:
+            existing_snowflake_quoted_identifiers = (
+                last_ordered_dataframe.projected_column_snowflake_quoted_identifiers
+            )
+
+        (
+            new_pivot_ordered_dataframe,
+            new_data_column_snowflake_quoted_identifiers,
+            new_data_column_pandas_labels,
+        ) = single_pivot_helper(
+            ordered_dataframe,
+            existing_snowflake_quoted_identifiers,
+            groupby_snowflake_quoted_identifiers,
+            pivot_snowflake_quoted_identifiers,
+            pivot_aggr_grouping.aggr_label_identifier_pair,
+            pivot_aggr_grouping.aggfunc,
+            pivot_aggr_grouping.prefix_label,
+        )
+
+        if last_ordered_dataframe:
+            last_ordered_dataframe = last_ordered_dataframe.join(
+                right=new_pivot_ordered_dataframe,
+                left_on_cols=groupby_snowflake_quoted_identifiers,
+                right_on_cols=groupby_snowflake_quoted_identifiers,
+                how="left",
+            )
+        else:
+            last_ordered_dataframe = new_pivot_ordered_dataframe
+
+        data_column_snowflake_quoted_identifiers.extend(
+            new_data_column_snowflake_quoted_identifiers
+        )
+        data_column_pandas_labels.extend(new_data_column_pandas_labels)
+
+    ordered_dataframe = last_ordered_dataframe
+
+    index_column_snowflake_quoted_identifiers = (
+        ordered_dataframe.projected_column_snowflake_quoted_identifiers[
+            0 : len(groupby_snowflake_quoted_identifiers)
+        ]
+    )
+    index = index or [None] * len(index_column_snowflake_quoted_identifiers)
+
+    # Generate the data column pandas index names
+    if not isinstance(columns, list):
+        columns = [columns]
+    columns = [None] * len(pivot_aggr_groupings[0].prefix_label) + columns
+
+    if expand_with_cartesian_product:
+        # Ensure the cartesian product of index / group by rows.  For example, if there are index values
+        # (a, b) and (c, z), then the cartesian product would be (a, b), (a, z), (c, b), (c, z).
+        ordered_dataframe = expand_dataframe_with_cartesian_product_on_index(
+            index_column_snowflake_quoted_identifiers, ordered_dataframe
+        )
+
+        # Ensure the cartesian product of pivot output columns based on the pandas labels.  For example, if there
+        # are output data columns (a, b) and (c, z) then the cartesian product would be (a, b), (a, z), (c, b),
+        # and (c, z).
+        (
+            data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers,
+            ordered_dataframe,
+        ) = expand_dataframe_with_cartesian_product_on_pivot_output(
+            data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers,
+            index_column_snowflake_quoted_identifiers,
+            ordered_dataframe,
+            sort_first_level,
+        )
+
+    # order by index column by default
+    ordered_dataframe = ordered_dataframe.sort(
+        [
+            OrderingColumn(quoted_identifier)
+            for quoted_identifier in index_column_snowflake_quoted_identifiers
+        ]
+    )
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=data_column_pandas_labels,
+        data_column_pandas_index_names=columns,
+        data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=index,
+        index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+    )
+
+
+def single_pivot_helper(
+    pivot_ordered_dataframe: OrderedDataFrame,
+    existing_snowflake_quoted_identifiers: list[str],
+    groupby_snowflake_quoted_identifiers: Optional[list[str]],
+    pivot_snowflake_quoted_identifiers: Optional[list[str]],
+    value_label_to_identifier_pair: PandasLabelToSnowflakeIdentifierPair,
+    pandas_aggr_func_name: str,
+    prefix_pandas_labels: tuple[LabelComponent],
+) -> tuple[OrderedDataFrame, list[str], list[Hashable]]:
+    """
+    Helper function that is a building block for generating a single pivot, that can be used by other pivot like
+    functions or multi-pivot composition.
+
+    Args:
+        pivot_ordered_dataframe: Original ordered DataFrame
+        existing_snowflake_quoted_identifiers: Existing snowflake quoted identifiers that should not be used here.
+        groupby_snowflake_quoted_identifiers: Group by identifiers, or None if not specified.
+        pivot_snowflake_quoted_identifiers: Pivot identifiers, or None if not specified.
+        value_label_to_identifier_pair: Aggregation value pandas label and snowflake quoted identifier mapping
+        pandas_aggr_func_name: pandas label for aggregation function (since used as a label)
+        prefix_pandas_labels: Any prefix labels that should be added to the result pivot column name, such as
+            the aggregation function or other labels.
+
+    Returns:
+        Tuple of:
+            Ordered DataFrame: pivot results joined with any last_pivot_ordered_dataframe
+            data_column_snowflake_quoted_identifiers: new data column snowflake quoted identifiers this pivot result
+            data_column_pandas_labels: new data column pandas labels for this pivot result
+    """
+    snowpark_aggr_func = get_snowflake_agg_func(pandas_aggr_func_name, {})
+    if not is_supported_snowflake_pivot_agg_func(snowpark_aggr_func):
+        # TODO: (SNOW-853334) Add support for any non-supported snowflake pivot aggregations
+        raise KeyError(pandas_aggr_func_name)
+
+    pandas_aggr_label, aggr_snowflake_quoted_identifier = value_label_to_identifier_pair
+
+    data_column_snowflake_quoted_identifiers = []
+    data_column_pandas_labels = []
+
+    groupby_snowflake_quoted_identifiers = groupby_snowflake_quoted_identifiers or []
+
+    # Select only the required columns so we narrow down the pivot to only the group by, pivot
+    # and aggregation columns used in the underlying pivot source query.
+    project_snowflake_quoted_identifiers = groupby_snowflake_quoted_identifiers.copy()
+    if pivot_snowflake_quoted_identifiers:
+        project_snowflake_quoted_identifiers.extend(pivot_snowflake_quoted_identifiers)
+
+    project_snowflake_quoted_identifiers += [aggr_snowflake_quoted_identifier]
+
+    pivot_ordered_dataframe = pivot_ordered_dataframe.select(
+        project_snowflake_quoted_identifiers
+    )
+
+    index_snowflake_quoted_identifiers = (
+        groupby_snowflake_quoted_identifiers or pivot_snowflake_quoted_identifiers or []
+    )
+
+    if not pivot_snowflake_quoted_identifiers or not aggr_snowflake_quoted_identifier:
+        if not groupby_snowflake_quoted_identifiers:
+            raise ValueError("No group keys passed!")
+
+        # If there are no pivot columns, then we do group by and aggregation only.
+
+        # TODO (SNOW-838808): Look at moving this to call groupby_agg so will handle arbitrary
+        # group-by constructs like grouper and proper ordering, etc.  Right now this would require
+        # dropping all non-relevant columns but drop currently doesn't drop __row_position__ or
+        # __index__ so it would break the other path where groupby is done inside pivot.
+        if aggr_snowflake_quoted_identifier:
+            pivot_ordered_dataframe = pivot_ordered_dataframe.group_by(
+                groupby_snowflake_quoted_identifiers,
+                snowpark_aggr_func(aggr_snowflake_quoted_identifier).as_(
+                    aggr_snowflake_quoted_identifier
+                ),
+            )
+        else:
+            # Snowpark doesn't allow a group-by without aggregation, so we do a distinct query instead.
+            pivot_ordered_dataframe = get_groups_for_ordered_dataframe(
+                pivot_ordered_dataframe, groupby_snowflake_quoted_identifiers
+            )
+
+    else:
+        # If multiple pivot columns, then we need to generate all permutations of the pivot column names in output.
+        if len(pivot_snowflake_quoted_identifiers) > 1:
+            temp_pivot_column_name = f"{TEMP_PIVOT_COLUMN_PREFIX}{random_name_for_temp_object(TempObjectType.COLUMN)}"
+            pivot_snowflake_quoted_identifier = (
+                pivot_ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[temp_pivot_column_name],
+                    excluded=existing_snowflake_quoted_identifiers,
+                )[0]
+            )
+
+            # Generate an object with 0-based key index, and value being the pivot column name.  For example, if we
+            # are pivoting columns with snowflake quoted identifiers A and B, then we would generate object
+            # object_construct("0", col(A), "1", col(B)).  Note that the key index literal is required to be a string
+            # so we convert to string in python if it is an int.
+            object_construct_key_values = [
+                pandas_lit(str(kv)) if isinstance(kv, int) else kv
+                for sub_key_values_list in list(
+                    enumerate(
+                        [
+                            col(snowflake_quoted_identifier)
+                            for snowflake_quoted_identifier in pivot_snowflake_quoted_identifiers
+                        ]
+                    )
+                )
+                for kv in sub_key_values_list
+            ]
+
+            # We use the OBJECT_CONSTRUCT to generate in this case serializing as a json object of values.
+            select_snowflake_quoted_identifiers = (
+                groupby_snowflake_quoted_identifiers
+                + [
+                    object_construct(*object_construct_key_values)
+                    .cast(StringType())
+                    .as_(pivot_snowflake_quoted_identifier)
+                ]
+                + [aggr_snowflake_quoted_identifier]
+            )
+
+            pivot_ordered_dataframe = pivot_ordered_dataframe.select(
+                *select_snowflake_quoted_identifiers
+            )
+        else:
+            pivot_snowflake_quoted_identifier = pivot_snowflake_quoted_identifiers[0]
+
+        (
+            pivot_ordered_dataframe,
+            snowpark_aggr_func,
+        ) = prepare_pivot_aggregation_for_handling_missing_and_null_values(
+            aggr_snowflake_quoted_identifier,
+            groupby_snowflake_quoted_identifiers + [pivot_snowflake_quoted_identifier],
+            pivot_ordered_dataframe,
+            snowpark_aggr_func,
+        )
+
+        # Perform the snowpark pivot operation grouping followed by the aggregation.
+        pivot_ordered_dataframe = pivot_ordered_dataframe.pivot(
+            pivot_snowflake_quoted_identifier,
+            None,
+            None,
+            snowpark_aggr_func(aggr_snowflake_quoted_identifier),
+        )
+
+    if not groupby_snowflake_quoted_identifiers:
+        # If there are no groupby columns, then use the aggregation column label.
+        if aggr_snowflake_quoted_identifier and isinstance(
+            pivot_snowflake_quoted_identifiers, list
+        ):
+            pivot_ordered_dataframe = pivot_ordered_dataframe.select(
+                pandas_lit(pandas_aggr_label).as_(
+                    pivot_snowflake_quoted_identifiers[0]
+                ),
+                "*",
+            )
+
+    # Go through each of the non-group by columns and
+    # 1. Generate corresponding pandas label (without prefix)
+    # 2. Drop any that are None
+    # 3. Add prefix pandas label if provided
+    # 4. Generate output data_columns
+    for (
+        snowflake_quoted_identifier
+    ) in pivot_ordered_dataframe.projected_column_snowflake_quoted_identifiers[
+        len(index_snowflake_quoted_identifiers) :
+    ]:
+        if (
+            pivot_snowflake_quoted_identifiers
+            and len(pivot_snowflake_quoted_identifiers) > 1
+            and aggr_snowflake_quoted_identifier
+        ):
+            pandas_label = (
+                extract_pandas_label_from_object_construct_snowflake_quoted_identifier(
+                    snowflake_quoted_identifier, len(pivot_snowflake_quoted_identifiers)
+                )
+            )
+
+            # Drop any multi-index that contains None values.
+            if None in pandas_label:
+                continue
+
+        else:
+            pandas_label = convert_snowflake_string_constant_to_python_string(
+                extract_pandas_label_from_snowflake_quoted_identifier(
+                    snowflake_quoted_identifier
+                )
+            )
+
+        # If there are prefix pandas labels, then rename the snowflake columns to include the prefix.  This helps
+        # produce pandas matching output as well as disambiguating joins if there is a last pivot df provided.
+        if prefix_pandas_labels:
+            pandas_label = prefix_pandas_labels + (
+                pandas_label if isinstance(pandas_label, tuple) else (pandas_label,)
+            )
+            pandas_label_column = str(pandas_label)
+        else:
+            pandas_label_column = pandas_label
+
+        # If the snowflake quoted identifier conflicts with an earlier identifier, ensure it is unique in snowflake
+        renamed_snowflake_quoted_identifier = (
+            pivot_ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[pandas_label_column],
+                excluded=existing_snowflake_quoted_identifiers,
+            )[0]
+        )
+
+        if renamed_snowflake_quoted_identifier != snowflake_quoted_identifier:
+            pivot_ordered_dataframe = append_columns(
+                pivot_ordered_dataframe,
+                renamed_snowflake_quoted_identifier,
+                col(snowflake_quoted_identifier),
+            )
+            snowflake_quoted_identifier = renamed_snowflake_quoted_identifier
+
+        data_column_snowflake_quoted_identifiers.append(snowflake_quoted_identifier)
+        data_column_pandas_labels.append(pandas_label)
+
+    return (
+        pivot_ordered_dataframe,
+        data_column_snowflake_quoted_identifiers,
+        data_column_pandas_labels,
+    )
+
+
+def prepare_pivot_aggregation_for_handling_missing_and_null_values(
+    aggr_snowflake_quoted_identifier: str,
+    grouping_snowflake_quoted_identifiers: list[str],
+    pivot_ordered_dataframe: OrderedDataFrame,
+    snowpark_aggr_func: Union[Callable, str],
+) -> tuple[OrderedDataFrame, Union[Callable, str]]:
+    """
+    Generates the pre-pivot aggregation required for sum and count to match pandas behavior.  This method is
+    intended to be called within single_pivot_helper prior to performing a pivot with count or sum aggfunc.
+
+    pandas and snowflake pivot have subtle different behavior for sum and count with respect to None (np.nan)
+    and empty values.  If there are only None values in the grouping then pandas sum and count will
+    return 0 as pivot value, however, if there are *no* values in the grouping then pandas returns None.
+    On the other hand, snowflake does not distinguish no values from all null (mapped to None/np.nan in pandas)
+    values, in these cases snowflake pivot returns 0 for count and null for sum.  To streamline the behavior
+    here, we do an explicit group-by and aggregation of the grouping columns *AND* pivot column prior to
+    snowflake pivot to ensure they would have 0 in this case and any empty groupings would return null.
+
+    An example that demonstrates the issue is:
+
+    df_data_small = pd.DataFrame(data={
+        "A": [ "foo", "foo", "bar", ],
+        "B": [ "one", "two", "one", ],
+        "C": [0, 1, None, ],
+    })
+
+    df_data_small.pivot_table(index=["A"], columns="B", values="C", aggfunc=["count", "sum"])
+
+    In this case, notice there are groupings like (bar, one) with only null values as well as missing
+    groupings (bar, two) that are not in the dataset.  The snowflake pivot relies on object_agg and get
+    together which do not distinguish between null values and no values.  For example, when expanding the pivot
+    output columns, for sum both (bar, one) and (bar, two) return null and for count both
+    (bar, one) and (bar, two) return 0.  The expected pandas pivot result:
+
+           | count | count | sum | sum
+       B   | one   | two   | one | two
+       A   |       |       |     |
+     ------+-------+-------+-----+-------
+      bar  | 0.0   | Nan   | 0.0 | Nan
+      foo  | 1.0   | 1.0   | 0.0 | 1.0
+
+    To match pandas behavior, we do an upfront group-by aggregation for count and sum to get the correct
+    values for all null values via snowflake query:
+
+    select a, b, coalesce(sum(C), 0) as sum_c, count(C) as cnt_c from df_small_data group by a, b;
+
+      A   | B   | SUM_C | CNT_C
+     -----+-----+-------+-------
+      foo | one | 0     | 1
+      foo | two | 1     | 1
+      bar | one | 0     | 0
+
+    Notice (bar, one) with all None values has the matching aggregation result, and (bar, two) is missing
+    but will aggregate as null also matching pandas behavior via the pivot operation itself.
+
+    Args:
+        aggr_snowflake_quoted_identifier: Aggregation column snowflake quoted identifier
+        grouping_snowflake_quoted_identifiers: Grouping snowflake quoted identififers
+        pivot_ordered_dataframe: Snowpark df
+        snowpark_aggr_func: Aggregation function to be performed.
+
+    Returns:
+        Snowpark dataframe that has done an pre-pivot aggregation needed for matching pandas pivot behavior as
+        described earlier.
+    """
+    if snowpark_aggr_func in [sum_, count]:
+        agg_expr = (
+            coalesce(sum_(aggr_snowflake_quoted_identifier), pandas_lit(0)).as_(
+                aggr_snowflake_quoted_identifier
+            )
+            if snowpark_aggr_func == sum_
+            else count(aggr_snowflake_quoted_identifier).as_(
+                aggr_snowflake_quoted_identifier
+            )
+        )
+        pre_pivot_ordered_dataframe = pivot_ordered_dataframe.group_by(
+            grouping_snowflake_quoted_identifiers, agg_expr
+        )
+
+        # Since we have pre-warmed the snowflake pivot aggregation, we do min for simplicity to pick the aggregation
+        # value.  Most aggregation functions would work here since there will only be one value for the grouping.
+        return pre_pivot_ordered_dataframe, min_
+
+    return pivot_ordered_dataframe, snowpark_aggr_func
+
+
+def generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings(
+    values: Union[list[Optional[str]], str],
+    internal_frame: InternalFrame,
+) -> list[PandasLabelToSnowflakeIdentifierPair]:
+    """
+    Generate the pivot values list so it can be used for iteration of the single pivots, this returns the pandas label
+    and corresponding snowflake quoted identifiers.  This includes the following items:
+
+    1) If a value is provided as a single value and not a list, it is converted as a list
+    2) If there are no values, then a single [None] list is returned for both values and snowflake quoted identifiers.
+    3) If a value is a duplicate, then value for both duplicates (corresponding to different snowflake quoted
+    identifiers are returned)
+
+    Args:
+        values: Pivot values (aggregation columns)
+        internal_frame: Pivot internal frame
+
+    Returns:
+          List of pandas label to snowflake quoted identifiers pairs
+    """
+    assert values is not None
+
+    values = [values] if isinstance(values, str) else values
+
+    values_label_to_identifiers = []
+
+    # It's okay in pandas not to have any values in this case, it's just a group-by.
+    if len(values) == 0:
+        return [PandasLabelToSnowflakeIdentifierPair(None, None)]
+
+    for value, snowflake_quoted_identifiers in zip(
+        values,
+        internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            values, include_index=False
+        ),
+    ):
+        if len(snowflake_quoted_identifiers) == 0:
+            raise KeyError(value)
+
+        for snowflake_quoted_identifier in snowflake_quoted_identifiers:
+            values_label_to_identifiers.append(
+                PandasLabelToSnowflakeIdentifierPair(value, snowflake_quoted_identifier)
+            )
+
+    return values_label_to_identifiers
+
+
+def generate_single_pivot_labels(
+    values_pandas_label_to_identifiers: list[PandasLabelToSnowflakeIdentifierPair],
+    aggfunc: AggFuncType,
+    has_pivot_columns: bool,
+    include_aggr_label_in_pandas_label: bool,
+    sort: bool,
+) -> Generator[PivotAggrGrouping, None, None]:
+    """
+    Generator to generate the correct ordering for pandas labels.  There are two cases we have with pandas pivot_table,
+    if the aggfunc is a List, then the topmost level is the aggregation function followed by aggregation value.
+    Otherwise, the topmost level is the aggregation value followed by the aggregation functions applies to that value.
+
+    There are also some rules to determine if labels (aggregation function label or aggregation value label) are
+    omitted, such as if aggregation values is a single value (not a list) and there is at least one pivot column.
+
+    The prefix_pandas_labels that is returned is a prefix that needs to be added to the underlying single pivot, that
+    includes any aggregation function or aggregation value name.  The single_pivot_helper later combines this with
+    the multi-pivot pandas labels that are produced to get the resulting output pandas labels.  For example:
+
+    df_data.pivot_table(index=['A'], columns=['B', 'C'], values=['D', 'E'], aggfunc={'D': ['count', 'sum'], 'E': 'max'})
+
+    for the single pivot involving value='D', aggfunc='count' the generated prefix_pandas_label would be ('D', 'count')
+    and the underlying pivot concatenates with the multi-pivot labels ('one', 'dull'), ('one', 'shiny'), ... etc
+    to generate the full output pandas labels ('D', 'count', 'one', 'dull'), (D', 'count', 'one', 'shiny'), ... to
+    matches the expected pandas output.
+
+    Args:
+        values_pandas_label_to_identifiers: Normalized list of aggregation values and snowflake quoted identifiers
+        aggfunc: Aggregation function specification, could be single aggfunc, list or dictionary mapping.
+        has_pivot_columns: Whether there is at least one pivot column specified.
+        include_aggr_label_in_pandas_label: Whether to default to including the aggregation label in the pandas label
+        sort: Whether sorting is specified, if True then the order of column labels such as aggregration
+            functions will be sorted, otherwise if False will be the order of the original aggfunc list.
+
+    Returns:
+        Tuple of inputs for the next single pivot operation in expected ordering to match pandas pivot_table.
+            prefix_pandas_labels: Prefix to apply to pandas label, may include aggregation or value.
+            value_pandas_label_to_identifiers: Aggregation value pandas label to snowflake quoted identifier
+            pandas_single_aggr_func: pandas aggregation function to apply to pandas aggregation label
+    """
+    if isinstance(aggfunc, list):
+        # Fetch all aggregation functions, it will be the same aggregation function list for each aggregation value.
+        (
+            pandas_aggfunc_list,
+            include_aggfunc_prefix,
+        ) = get_pandas_aggr_func_and_prefix(aggfunc, None, sort)
+        # In this case the aggfunc is a list of agg functions and to match pandas behavior, we always
+        # include the aggfunc name in the resulting pandas labels.
+        assert (
+            include_aggfunc_prefix is True
+        ), "aggr func should add prefix to resulting pandas label"
+        assert pandas_aggfunc_list is not None
+
+        # 1. Loop through all aggregation functions for this aggregation value.
+        for pandas_single_aggr_func in pandas_aggfunc_list:
+            # 2. Loop through all aggregation values
+            for value_pandas_label_to_identifier in values_pandas_label_to_identifiers:
+                pandas_aggr_label, _ = value_pandas_label_to_identifier
+                prefix_pandas_labels = (
+                    [get_pandas_aggr_func_name(pandas_single_aggr_func)]
+                ) + (
+                    [pandas_aggr_label]
+                    if has_pivot_columns and include_aggr_label_in_pandas_label
+                    else []
+                )
+
+                yield PivotAggrGrouping(  # type: ignore[misc]
+                    prefix_label=tuple(prefix_pandas_labels),
+                    aggr_label_identifier_pair=value_pandas_label_to_identifier,
+                    aggfunc=pandas_single_aggr_func,
+                )
+    else:
+        # 1. Loop through all aggregation values
+        for value_pandas_label_to_identifier in values_pandas_label_to_identifiers:
+            pandas_aggr_label, _ = value_pandas_label_to_identifier
+
+            # Fetch all aggregation functions that apply to this aggregation value.
+            (
+                pandas_aggfunc_list,
+                include_aggfunc_prefix,
+            ) = get_pandas_aggr_func_and_prefix(aggfunc, pandas_aggr_label, sort)
+
+            if not pandas_aggfunc_list:
+                continue
+
+            # 2. Loop through all aggregation functions for this aggregation value.
+            for pandas_single_aggr_func in pandas_aggfunc_list:
+                # pandas only adds aggregation value as label if provided as a list
+                # Insert the aggregation function into the label at expected level
+                prefix_pandas_labels = (
+                    [pandas_aggr_label] if include_aggr_label_in_pandas_label else []
+                ) + (
+                    [get_pandas_aggr_func_name(pandas_single_aggr_func)]
+                    if include_aggfunc_prefix
+                    else []
+                )
+
+                yield PivotAggrGrouping(  # type: ignore[misc]
+                    prefix_label=tuple(prefix_pandas_labels),
+                    aggr_label_identifier_pair=value_pandas_label_to_identifier,
+                    aggfunc=pandas_single_aggr_func,
+                )
+
+
+def get_pandas_aggr_func_and_prefix(
+    aggfunc: AggFuncType,
+    aggr_pandas_label: Optional[Hashable],
+    sort: bool,
+) -> tuple[Optional[list[AggFuncTypeBase]], bool]:
+    """
+    Retrieve the aggfunc for this aggregation value along with whether to include the aggfunc label in the output label.
+
+    1) if aggfunc=['count', 'min'] then aggfunc label is the top-most level (0), ie. (aggfunc_label, aggr_col, ...)
+    2) if aggfunc are single for each aggr_col, such as aggfunc='min' or aggfunc=={'D': 'min', 'E': 'max'} then
+    the aggfunc label is omitted, ie. (aggr_col, ...)
+    3) if aggfunc is a dict with multiple values for a label, such as aggfunc={'D': ['min', 'max'], 'E': 'count'}
+    then the aggfunc label occurs at the second-level (1), ie. (aggr_col, aggrfunc_label, ...)
+
+    Args:
+        aggfunc: Aggregation function name, list or dictionary.
+        aggr_pandas_label: Aggregation pandas label the aggfunc will apply to.
+        sort: Whether to sort if aggfunc is a dictionary with list value.
+
+    Returns:
+        Tuple of
+            List of aggregation functions to apply for this aggr pandas label.
+            Whether the aggregation function should be added as a prefix.
+
+        If pandas_aggr_func None is returned then the aggregation should be skipped because the specification was
+        missing (this can happen in cases of a dictionary that doesn't include the aggregation label as a key.)
+    """
+    if isinstance(aggfunc, dict):
+        # If they provide a dict for aggfuncs (mapping between column and its aggfuncs(s)), if the current
+        # pivot values column is not in the aggfunc, then we'll skip this values column.
+        if aggr_pandas_label not in aggfunc:
+            pandas_aggr_func = None
+        else:
+            pandas_aggr_func = aggfunc[aggr_pandas_label]
+
+            if not isinstance(pandas_aggr_func, list):
+                pandas_aggr_func = [pandas_aggr_func]
+
+            if sort:
+                pandas_aggr_func.sort(key=lambda func: get_pandas_aggr_func_name(func))
+
+        include_prefix = any([isinstance(af, list) for af in aggfunc.values()])
+
+    elif isinstance(aggfunc, list):
+        pandas_aggr_func = aggfunc
+
+        if len(pandas_aggr_func) == 0:
+            raise ValueError("Expected at least one aggregation function")
+
+        include_prefix = True
+    else:
+        pandas_aggr_func = [aggfunc]
+        include_prefix = False
+
+    return pandas_aggr_func, include_prefix
+
+
+def expand_dataframe_with_cartesian_product_on_index(
+    groupby_snowflake_quoted_identifiers: list[str],
+    ordered_dataframe: OrderedDataFrame,
+) -> OrderedDataFrame:
+    """
+    Generate the cartesian product on group by snowflake identifiers.  For example, if there are only
+    two group-by rows (bar, one) and (foo, two), then resulting snowpark dataframe would additionally have
+    null rows for (bar, two) and (foo, one) so the full cartesian product of group-by snowflake quoted
+    identifiers are present in the resulting dataframe.
+
+    Example:
+        df = pd.DataFrame({'A': ['bar', 'foo'], 'B': ['one', 'two'], 'F': [1, 2])
+
+        -------------------
+        | "A" | "B" | "F" |
+        -------------------
+        | bar | one | 1   |
+        | foo | two | 2   |
+        -------------------
+
+        expand_dataframe_with_cartesian_product_on_group_by_snowflake_identifiers(['A', 'B'], df) returns
+
+        --------------------
+        | "A" | "B" | "F"  |
+        --------------------
+        | bar | one | 1    |
+        | bar | two | None |
+        | foo | one | None |
+        | foo | two | 2    |
+        --------------------
+
+        In this case, the rows (bar, two), (foo, one) were added.
+
+    Args:
+            groupby_snowflake_quoted_identifiers: Group by snowflake quoted identifiers that will be expanded to
+                the full cartesian product in the output.
+            ordered_dataframe: Ordered dataframe
+
+    Returns:
+        Resulting snowpark dataframe containing full cartesian product of group by snowflake quoted identififers.
+    """
+
+    # Since we want to generate null rows that would otherwise not exist in the snowflake output, we must
+    # create them using cross join of all the distinct group-by column combinations.
+    distinct_groupby_ordered_dataframes = [
+        get_distinct_rows(ordered_dataframe.select(snowflake_quoted_identifier))
+        for snowflake_quoted_identifier in groupby_snowflake_quoted_identifiers
+    ]
+
+    full_na_ordered_dataframe = reduce(
+        lambda df1, df2: df1.join(df2, how="cross"),
+        distinct_groupby_ordered_dataframes,
+    )
+
+    # Join the full set of group-by permutations with original data to create null rows for any missing.
+    ordered_dataframe = full_na_ordered_dataframe.join(
+        right=ordered_dataframe,
+        left_on_cols=groupby_snowflake_quoted_identifiers,
+        right_on_cols=groupby_snowflake_quoted_identifiers,
+        how="outer",
+    )
+
+    return ordered_dataframe
+
+
+def expand_dataframe_with_cartesian_product_on_pivot_output(
+    data_column_pandas_labels: list[Hashable],
+    data_column_snowflake_quoted_identifiers: list[str],
+    index_column_snowflake_quoted_identifiers: list[str],
+    ordered_dataframe: OrderedDataFrame,
+    sort_first_level: bool,
+) -> tuple[list[Hashable], list[str], OrderedDataFrame]:
+    """
+    This expands the dataframe to contain the full cartesian product of pandas labels.
+
+    Example:
+        Suppose there is dataframe with
+            data column panda labels: (E, min, a, x), (E, min, b, y), (F, max, a, x), (F, max, b, y)
+            with corresponding data snowflake quoted identifiers.
+        then the output would be:
+            data column panda labels:
+                (E, min, a, x), (E, min, a, y), (E, min, b, x), (E, min, b, y),
+                (E, max, a, x), (E, max, a, y), (E, max, b, x), (E, max, b, y),
+                (F, min, a, x), (F, min, a, y), (F, min, b, x), (F, min, b, y)
+                (F, max, a, x), (F, max, a, y), (F, max, b, x), (F, max, b, y)
+            with corresponding data snowflake quoted identifiers.  The new columns are
+            added with null values.
+
+    Args:
+        data_column_pandas_labels : data column pandas labels
+        data_column_snowflake_quoted_identifiers : data column snowflake quoted identifiers
+        index_column_snowflake_quoted_identifiers : index column snowflake quoted identifiers
+        ordered_dataframe : Ordered dataframe
+        sort_first_level : whether to sort the first level of the pandas label explicitly
+
+    Returns:
+        Tuple of
+            Expanded data pandas labels
+            Expanded data snowflake quoted identifiers
+            Snowpark dataframe including margin columns and final margin
+    """
+    pandas_label_by_level: dict[int, list[str]] = {}
+    pandas_label_tuple_to_snowflake_quoted_identifier: dict[LabelTuple, str] = {}
+
+    # First break down the pandas_labels by level, so we have a level -> label str mapping.
+    for pandas_label, snowflake_quoted_identifier in zip(
+        data_column_pandas_labels, data_column_snowflake_quoted_identifiers
+    ):
+        pandas_label_tuple = from_pandas_label(
+            pandas_label,
+            len(pandas_label) if isinstance(pandas_label, tuple) else 1,
+        )
+
+        for level, pandas_level_label in enumerate(pandas_label_tuple):
+            if level not in pandas_label_by_level:
+                pandas_label_by_level[level] = []
+            if pandas_level_label not in pandas_label_by_level[level]:
+                pandas_label_by_level[level].append(pandas_level_label)
+
+        pandas_label_tuple_to_snowflake_quoted_identifier[
+            pandas_label_tuple
+        ] = snowflake_quoted_identifier
+
+    # Generate a list of the labels (sorted) and expected at each level.
+    cartesian_product_pandas_labels_list = []
+    for level in range(0, len(pandas_label_by_level)):
+        pandas_labels_at_level = pandas_label_by_level[level]
+
+        # If the aggfunc is a List then the top-level label (the aggregation function name) is not sorted but
+        # retains its original ordering.  Otherwise, note this is always sorted regardless of whether sort is specified.
+        if level >= 1 or sort_first_level:
+            pandas_labels_at_level.sort()
+
+        cartesian_product_pandas_labels_list.append(pandas_labels_at_level)
+
+    # Generate the cartesian product based on the level labels.
+    pandas_cartesian_product_labels = list(
+        product(*cartesian_product_pandas_labels_list)
+    )
+
+    # Check if the size matches, if so, this means we already have the cartesian product and can skip this.
+    if len(pandas_cartesian_product_labels) != len(data_column_pandas_labels):
+        expanded_data_column_pandas_labels = []
+        expanded_new_data_column_snowflake_quoted_identifiers = []
+
+        select_snowflake_quoted_identifiers_with_null_columns = (
+            index_column_snowflake_quoted_identifiers.copy()
+        )
+
+        # For the cartesian product labels, if it's an existing pandas label in the dataframe, then reference it
+        # otherwise generate a new snowflake quoted identifier with null initial value.
+        for pandas_label_tuple in pandas_cartesian_product_labels:
+            pandas_label = to_pandas_label(pandas_label_tuple)
+            if pandas_label_tuple in pandas_label_tuple_to_snowflake_quoted_identifier:
+                snowflake_quoted_identifier = (
+                    pandas_label_tuple_to_snowflake_quoted_identifier[
+                        pandas_label_tuple
+                    ]
+                )
+                select_snowflake_quoted_identifiers_with_null_columns.append(
+                    snowflake_quoted_identifier
+                )
+            else:
+                snowflake_quoted_identifier = (
+                    ordered_dataframe.generate_snowflake_quoted_identifiers(
+                        pandas_labels=[pandas_label],
+                    )[0]
+                )
+                select_snowflake_quoted_identifiers_with_null_columns.append(
+                    pandas_lit(None).cast(DoubleType()).as_(snowflake_quoted_identifier)
+                )
+
+            expanded_data_column_pandas_labels.append(pandas_label)
+            expanded_new_data_column_snowflake_quoted_identifiers.append(
+                snowflake_quoted_identifier
+            )
+
+        ordered_dataframe = ordered_dataframe.select(
+            select_snowflake_quoted_identifiers_with_null_columns
+        )
+    else:
+        expanded_data_column_pandas_labels = data_column_pandas_labels
+        expanded_new_data_column_snowflake_quoted_identifiers = (
+            data_column_snowflake_quoted_identifiers
+        )
+
+    return (
+        expanded_data_column_pandas_labels,
+        expanded_new_data_column_snowflake_quoted_identifiers,
+        ordered_dataframe,
+    )
+
+
+def apply_fill_value_to_snowpark_column(
+    col: SnowparkColumn,
+    fill_value: Scalar,
+) -> SnowparkColumn:
+    """
+    Returns snowpark column that has the fill_value applied to the respective column if needed.
+    Argunents:
+        col: Snowpark column
+        fill_value: Fill value, reply on snowflake server to type check.
+    Returns:
+        Returns snowpark column with fill_value applied.
+    """
+    return coalesce(col, pandas_lit(fill_value))
+
+
+def get_margin_aggregation(
+    aggfunc: Union[Callable, str],
+    snowflake_quoted_identifier: str,
+) -> SnowparkColumn:
+    """
+    Normalizes the output of aggregation functions that are slightly different between pandas and snowflake.  For
+    example, SUM will return 0 in pandas and null in snowflake if all values are null.
+
+    Args:
+        aggfunc: aggregation function, either a callable or string name
+        snowflake_quoted_identifier: snowflake quoted identifier
+
+    Returns:
+        Snowpark column expression for the aggregation function result.
+    """
+    resolved_aggfunc = get_snowflake_agg_func(aggfunc, {})
+
+    # This would have been resolved during the original pivot at an early stage.
+    assert resolved_aggfunc is not None
+
+    aggfunc_expr = resolved_aggfunc(snowflake_quoted_identifier)
+
+    if resolved_aggfunc == sum_:
+        aggfunc_expr = coalesce(aggfunc_expr, pandas_lit(0))
+
+    return aggfunc_expr
+
+
+def expand_pivot_result_with_pivot_table_margins(
+    pivot_aggr_groupings: list[PivotAggrGrouping],
+    groupby_snowflake_quoted_identifiers: list[str],
+    pivot_snowflake_quoted_identifiers: list[str],
+    original_ordered_dataframe: OrderedDataFrame,
+    pivoted_qc: "SnowflakeQueryCompiler",  # type: ignore[name-defined] # noqa: F821
+    margins_name: Optional[str] = None,
+    fill_value: Optional[Scalar] = None,
+) -> "SnowflakeQueryCompiler":  # type: ignore[name-defined] # noqa: F821
+    """
+    Expand dataframe with pivot table margins.  This includes adding a margin column for each pivot aggregation
+    grouping and a final margin row with totals for each of the columns.  The resulting row position ordering is
+    consistent with pandas.
+
+    Args:
+        pivot_aggr_groupings: List of pivot aggregation groupings composed of
+            Label component prefix of the corresponding pandas labels
+            pandas Label to snowflake identifier pair
+            Aggregation function
+        groupby_snowflake_quoted_identifiers : Group by snowflake quoted identifiers
+        pivot_snowflake_quoted_identifiers : Pivot snowflake quoted identifiers
+        original_ordered_dataframe : Original ordered dataframe (pre-pivot)
+        pivoted_qc : The SnowflakeQueryCompiler result after regular pivot
+        margins_name : Name of the margins, or default 'All' if None specified.
+        fill_value: value used to fill the na elements for the margin columns/rows
+
+    Returns:
+        An SnowflakeQueryCompiler result with margin columns and rows appended to the pivot result.
+    """
+    margins_name = margins_name or DEFAULT_MARGINS_NAME
+
+    # To calculate margins, we need to figure out the pivot result groupings since we need to add a margin column
+    # to each pivot aggregation column.  To do this, we extract the pandas label prefix that is associated with
+    # a grouping.  For example, if there is a pandas label ('min', 'A', 'x', 'y') then ('min', 'A') would be the
+    # shared prefix of all pandas labels in that pivot result grouping and we would add a margin column such as
+    # ('min', 'A', None, 'All') after this grouping.
+
+    # Generate a map of prefix -> aggfunc for quick look up later.
+    aggr_groupings_aggfunc_map = {
+        grouping.prefix_label: grouping.aggfunc for grouping in pivot_aggr_groupings
+    }
+
+    # Generate a map of prefix -> aggregation snowflake quoted identifier for quick look up later.
+    aggr_groupings_snowflake_quoted_identifier_map = {
+        grouping.prefix_label: grouping.aggr_label_identifier_pair.snowflake_quoted_identifier
+        for grouping in pivot_aggr_groupings
+    }
+
+    # Collect the resulting pandas label and snowflake quoted identifiers
+    updated_data_column_pandas_labels = []
+    updated_data_column_snowflake_quoted_identifiers = []
+
+    # The margin aggregations are calculated through the following steps.  Consider hypothetical pivot table input:
+    #    pivot_snowflake_quoted_identifiers = ['B', 'C']
+    #    groupby_snowflake_quoted_identifiers = ['A']
+    #    data_column_pandas_labels = [(count, D, foo, red), (count, D, bar, blue),
+    #                                 (sum, E, foo, red), (sum, E, bar, blue)]
+    #    pivot_aggr_groupings = [(aggfunc=count, prefix=(count, D), aggr_pandas_label=[(D, ), "D"),
+    #                            (aggfunc=sum, prefix=(sum, E), aggr_pandas_label=[(E, ), "E"])]
+    #
+    #          count         sum
+    #          D             E
+    #   A   B  foo     bar   foo     bar
+    #       C  red     blue  red     blue
+    # ---------------------------------------
+    #     cat  5.0     NaN   3.0     NaN
+    #     dog  7.0     2.0   5.0     1.0
+    #
+    # The final result in expanding margins includes new margin column (per grouping) corresponding final margin row.
+    #
+    #           count                 sum
+    #           D                     E
+    #   A   B   foo     bar   All     foo     bar      All
+    #       C   red     blue          red     blue
+    # --------------------------------------------------------
+    #     cat   5.0     NaN     5      3.0     NaN     3
+    #     dog   7.0     2.0     9      5.0     1.0     6
+    #     All  12.0     2.0     14     8.0     1.0     9
+    #
+    # ie, the final "All" row, always at the bottom regardless of sort order and new data columns including:
+    #   (count, D, All, ""), (sum, E, All, "")
+    #
+    # Note that in general, result pandas label can include aggfunc names, aggregation or pivot labels depending on the
+    # specific parameters of the pivot_table.  Here we are only concerned with unique groupings related to single
+    # pivot operation (pivot_aggr_groupings), so we track via the corresponding pandas label ie. [(count, D), (sum, E)]
+    # in this example.
+
+    # The projection for the final row margin which contains totals of each column.
+
+    # margin_row_aggregations accumulates the expression for the final margin row aggregation, in this example:
+    #     All  12.0     2.0     14     8.0     1.0     9
+    margin_row_aggregations = []
+
+    # margin_column_aggregations accumulates the expressions for the margin column aggregation, in this example:
+    #     count    sum
+    #         D      E
+    #       All    All
+    #
+    #   --------------
+    #         5      3
+    #         9      6
+    #        14      9
+    margin_columns_aggregations = []
+
+    # Step 1) Generate mapping of prefix to data columns aligned with each grouping.  In this example would generate:
+    # (count, D) -> [(count, D, foo, red), (count, D, bar, blue)]
+    # (sum, E) -> [(sum, E, foo, red), (sum, E, bar, blue)]
+    prefix_len = len(pivot_aggr_groupings[0].prefix_label)
+    assert all(
+        len(pivot_aggr_groupings[0].prefix_label) == len(g.prefix_label)
+        for g in pivot_aggr_groupings
+    )
+    num_levels = max(len(pivot_snowflake_quoted_identifiers), 1) + prefix_len
+
+    pivoted_frame = pivoted_qc._modin_frame
+    data_column_prefix_groupings = generate_column_prefix_groupings(
+        pivoted_frame.data_column_pandas_labels,
+        pivoted_frame.data_column_snowflake_quoted_identifiers,
+        num_levels,
+        prefix_len,
+    )
+
+    # Step 2) Iterate through each data column grouping, in this example for each of:
+    #   (count, D) -> [(count, D, foo, red), (count, D, bar, blue)]
+    #   (sum, E) -> [(sum, E, foo, red), (sum, E, bar, blue)]
+    for data_column_prefix, data_column_grouping in data_column_prefix_groupings:
+        # Step 2 (A):
+        # Look up the aggfunc and aggregation snowflake quoted identifier for this grouping.  We need this to
+        # perform the margin aggregation on the original dataframe.  In this example, it would return per iteration:
+        #   iteration #1:   (count, D) -> aggfunc=count, aggr_snowflake_identifier='D'
+        #   iteration #2:   (sum, E) -> aggfunc=sum, aggr_snowflake_identifier='E'
+        original_aggr_func = aggr_groupings_aggfunc_map[data_column_prefix]
+        aggr_snowflake_quoted_identifier = (
+            aggr_groupings_snowflake_quoted_identifier_map[data_column_prefix]
+        )
+
+        # Step 2 (B):
+        # For each data colum in the data column grouping, go through each data column and identifier, and generate
+        # the corresponding final row margin expression.  This is an aggregation (aggfunc) on the designated
+        # aggr_snowflake_identifier filtered down to the particular pivot column result.  In this example each
+        # iteration would be set of data columns:
+        #   iteration #1: [(count, D, foo, red), (count, D, bar, blue)]
+        #   iteration #2: [(sum, E, foo, red), (sum, E, bar, blue)]
+        for (
+            data_column_pandas_label,
+            data_column_snowflake_quoted_identifier,
+        ) in data_column_grouping:
+            # Output the data column to the expanded projection for the final result since pandas label and
+            # snowflake identifiers are not changing here.
+            updated_data_column_pandas_labels.append(data_column_pandas_label)
+            updated_data_column_snowflake_quoted_identifiers.append(
+                data_column_snowflake_quoted_identifier
+            )
+
+            # Extract the non-prefix part which provides the pivot parts in the result, this would be the
+            # suffix label components in this example: [(foo, red,), (bar, blue,)]
+            pivot_output_label_components = from_pandas_label(
+                data_column_pandas_label, num_levels
+            )[prefix_len:]
+
+            # Step 2 (B.1):
+            # Putting this together, we generate the margin aggregation (aggfunc) on the aggr snowflake identifier
+            # (obtained from Step 2 (A)), filtering to the pivot_snowflake_quoted_identifiers=['B', 'C'] on the
+            # original dataframe (note, this is not the pivot result dataframe!)  For example:
+            #   iteration #1:
+            #       aggfunc=count, aggr_snowflake_identifier='D'
+            #       COUNT(IFF(col('B') == lit('foo') AND col('C') == lit('red'), col('D'), null))
+            #
+            #   iteration #1:
+            #       aggfunc=sum, aggr_snowflake_identifier='E'
+            #       SUM(IFF(col('B') == lit('foo') AND col('C') == lit('red'), col('E'), null))
+            #
+            # We also apply fill_value (if applicable) since pandas does this on the data columns, however, it does
+            # not apply fill_value on the new margin columns.
+            margin_row_aggregations.append(
+                apply_fill_value_to_snowpark_column(
+                    get_margin_aggregation(
+                        original_aggr_func,
+                        iff(
+                            reduce(
+                                lambda b1, b2: b1 & b2,
+                                [
+                                    (
+                                        col(pivot_snowflake_quoted_identifier)
+                                        == pandas_lit(pivot_value)
+                                    )
+                                    for pivot_value, pivot_snowflake_quoted_identifier in zip(
+                                        pivot_output_label_components, pivot_snowflake_quoted_identifiers  # type: ignore[arg-type]
+                                    )
+                                ],
+                            ),
+                            col(aggr_snowflake_quoted_identifier),
+                            None,
+                        ),
+                    ),
+                    fill_value,
+                ).as_(data_column_snowflake_quoted_identifier)
+            )
+
+        # Step 2 (C):
+        # After each data column grouping, we generate a margin column expression which aggregates across pivot values.
+
+        # Generate pandas label for the margin column, for example (for reference see the final result with expanded
+        # margins in the original example earlier):
+        #   iteration #1: [(count, D, All, "")]
+        #   iteration #2: [(sum, E, All, ""]]
+        margin_column_pandas_label = to_pandas_label(
+            tuple(
+                list(data_column_prefix)
+                + [margins_name]
+                + [""] * (num_levels - prefix_len - 1)
+            )
+        )
+
+        # Generate the corresponding margin column snowflake quoted identifier for above pandas label.
+        margin_column_aggr_snowflake_quoted_identifier = (
+            pivoted_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[margin_column_pandas_label],
+            )[0]
+        )
+
+        # For each iteration, the margin aggregation expression would be in this example:
+        #   iteration #1: COUNT(col('D'))
+        #   iteration #2: SUM(col('E'))
+        margin_aggregation_expression = get_margin_aggregation(
+            original_aggr_func, aggr_snowflake_quoted_identifier
+        ).as_(margin_column_aggr_snowflake_quoted_identifier)
+
+        # Add the margin aggregation expression (above) to the list of margin column aggregations.  These are
+        # later grouped by the groupby_snowflake_quoted_identifiers to get the final result.
+        margin_columns_aggregations.append(margin_aggregation_expression)
+
+        # Add the margin aggregation for the final margin row margin, not grouped as it totals across all pivot values.
+        margin_row_aggregations.append(margin_aggregation_expression)
+
+        # Step 2 (D):
+        # Add the margin column to the updated pandas label and corresponding snowflake identifiers, in this example:
+        #   iteration #1: (count, D, All, "")
+        #   iteration #2: (sum, E, All, "")
+        updated_data_column_pandas_labels.append(margin_column_pandas_label)
+        updated_data_column_snowflake_quoted_identifiers.append(
+            margin_column_aggr_snowflake_quoted_identifier
+        )
+
+    # Step 3)
+    # To generate the margin column aggregations we need to group by the groupby_snowflake_quoted_identifiers and join
+    # back into the pivot result dataframe.  Note the resulting columns are already accumulated and ordered
+    # in updated_pandas_label and updated_snowflake_quoted_identifiers, per iterations in Step 2 (B) and Step 2 (D).
+    # In this example with margin_column_aggregations generated earlier and groupby_snowflake_quoted_identifiers=['A']
+    #
+    #   SELECT *
+    #   FROM (
+    #      SELECT A,
+    #              COUNT(col('D')) AS "(count, D, All, '')",
+    #              SUM(col('E')) AS "(sum, E, All, '')"
+    #      FROM <original_snowpark_df>
+    #      GROUP BY A
+    #   ) T1 JOIN <pivot_snowpark_df> T2
+    #   ON T1.A = T2.A
+    margin_columns_ordered_dataframe = original_ordered_dataframe.group_by(
+        groupby_snowflake_quoted_identifiers, *margin_columns_aggregations
+    )
+
+    pivoted_ordered_dataframe = pivoted_frame.ordered_dataframe.join(
+        right=margin_columns_ordered_dataframe,
+        left_on_cols=groupby_snowflake_quoted_identifiers,
+        right_on_cols=groupby_snowflake_quoted_identifiers,
+        how="outer",
+    )
+    pivoted_ordered_dataframe = pivoted_ordered_dataframe.sort(
+        pivoted_frame.ordering_columns
+    )
+
+    pivoted_frame_with_column_margin = InternalFrame.create(
+        ordered_dataframe=pivoted_ordered_dataframe,
+        data_column_pandas_labels=updated_data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=updated_data_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=pivoted_frame.data_column_pandas_index_names,
+        index_column_pandas_labels=pivoted_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=pivoted_frame.index_column_snowflake_quoted_identifiers,
+    )
+
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    pivoted_qc = SnowflakeQueryCompiler(pivoted_frame_with_column_margin)
+
+    # At this step, the pivot_snowpark_df not has the margin columns (although not yet ordered, that happens later).
+    #
+    #           count         sum              count   sum
+    #           D             E                D       E
+    #   A   B   foo     bar   foo     bar      All     All
+    #       C   red     blue  red     blue
+    # ------------------------------------------------------------
+    #     cat   5.0     NaN   3.0      NaN     5       3
+    #     dog   7.0     2.0   5.0    1 .0      9       6
+
+    # Step 5)
+    # Generate the final margin expanded dataframe by adding the margin row aggregations and ordering as expected.
+
+    # Generate the dataframe for the final row margin aggregations which is the margin_row_aggregations
+    # accumulated from Step 2 (B.1) and Step 2 (C).  Note that because we are adding a new index value for
+    # the margin which are all string type.
+
+    # create the values for the index column of the margin row
+    margins_groupby_label_tuple = tuple(
+        [margins_name]
+        + [""]
+        * (
+            len(
+                pivoted_frame_with_column_margin.index_column_snowflake_quoted_identifiers
+            )
+            - 1
+        )
+    )
+    margin_row_groupby_select_list = [
+        pandas_lit(label).cast(StringType()).as_(snowflake_quoted_identifier)
+        for label, snowflake_quoted_identifier in zip(
+            margins_groupby_label_tuple,
+            pivoted_frame_with_column_margin.index_column_snowflake_quoted_identifiers,
+        )
+    ]
+
+    ordering_columns = [
+        OrderingColumn(quoted_identifier)
+        for quoted_identifier in pivoted_frame_with_column_margin.index_column_snowflake_quoted_identifiers
+    ]
+    margin_row_df = original_ordered_dataframe.agg(
+        *(margin_row_groupby_select_list + margin_row_aggregations)
+    )
+    margin_row_df = margin_row_df.sort(ordering_columns)
+    margin_row_df_identifiers = (
+        margin_row_df.projected_column_snowflake_quoted_identifiers
+    )
+
+    margin_row_frame = InternalFrame.create(
+        ordered_dataframe=margin_row_df,
+        data_column_pandas_labels=pivoted_frame_with_column_margin.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=margin_row_df_identifiers[
+            len(groupby_snowflake_quoted_identifiers) :
+        ],
+        data_column_pandas_index_names=pivoted_frame_with_column_margin.data_column_pandas_index_names,
+        index_column_pandas_labels=pivoted_frame_with_column_margin.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=margin_row_df_identifiers[
+            0 : len(groupby_snowflake_quoted_identifiers)
+        ],
+    )
+    single_row_qc = SnowflakeQueryCompiler(margin_row_frame)
+
+    # append the margin_row_frame to the pivoted_frame_with_column_margin using concat to create the
+    # final result frame.
+    # Return the final dataframe and updated pandas labels including the margin row and columns.
+    #
+    #           count                 sum                     row_position
+    #           D                     E
+    #   A   B   foo     bar   All     foo     bar      All
+    #       C   red     blue          red     blue
+    # --------------------------------------------------------------------
+    #     cat   5.0     NaN     5      3.0     NaN     3      0
+    #     dog   7.0     2.0     9      5.0     1.0     6      1
+    #     All  12.0     2.0     14     8.0     1.0     9      2
+    pivoted_qc_with_margin = pivoted_qc.concat(axis=0, other=[single_row_qc])
+
+    return pivoted_qc_with_margin
+
+
+def generate_column_prefix_groupings(
+    pandas_labels: list[Hashable],
+    snowflake_quoted_identifiers: list[str],
+    num_levels: int,
+    prefix_len: int,
+) -> list[tuple[tuple[LabelComponent], list[PandasLabelToSnowflakeIdentifierPair]]]:
+    """
+    Generate column prefix groupings.  Given a list of pandas label and corresponding snowflake identifiers,
+    return a mapping of each prefix group.  The prefix is based on the pandas label prefix_len components, for
+    example, if the pandas label is (a,b,c,d,e) and prefix_len=3 then we say the prefix is (a,b,c).
+
+    Args:
+        pandas_labels: data column pandas labels
+        snowflake_quoted_identifiers: snowflake quoted identifiers
+        num_levels: number of levels in pandas label
+        prefix_len: prefix length for defining prefix of pandas label
+
+    Returns:
+        List of tuples
+            Prefix
+            pandas Label to Snowflake Identifier pair
+    """
+    margin_data_column_prefixes: list[tuple[LabelComponent]] = []
+    margin_data_column_groupings: list[list[PandasLabelToSnowflakeIdentifierPair]] = []
+
+    # Since the data columns may have been expanded, we go through and formally group them by prefix so we can
+    # generate the correct projection with margin columns later on.
+    last_pivot_grouping = None
+    for pandas_label, snowflake_quoted_identifier in zip(
+        pandas_labels, snowflake_quoted_identifiers
+    ):
+        pandas_label_prefix = from_pandas_label(pandas_label, num_levels)[:prefix_len]
+
+        if last_pivot_grouping != pandas_label_prefix:
+            margin_data_column_groupings.append([])
+            last_pivot_grouping = pandas_label_prefix
+            margin_data_column_prefixes.append(pandas_label_prefix)
+
+        margin_data_column_groupings[-1].append(
+            PandasLabelToSnowflakeIdentifierPair(
+                pandas_label, snowflake_quoted_identifier
+            )
+        )
+
+    return list(zip(margin_data_column_prefixes, margin_data_column_groupings))
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/resample_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/resample_utils.py
new file mode 100644
index 00000000000..3e213564006
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/resample_utils.py
@@ -0,0 +1,747 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import math
+from typing import Any, Literal, NoReturn, Optional, Union
+
+import pandas as native_pd
+from pandas._libs.lib import no_default
+from pandas._libs.tslibs import to_offset
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.functions import (
+    builtin,
+    coalesce,
+    col,
+    dateadd,
+    datediff,
+    lead,
+    lit,
+    row_number,
+    to_timestamp,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.join_utils import InheritJoinIndex, join
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    generate_snowflake_quoted_identifiers_helper,
+    pandas_lit,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.types import DateType, TimestampType
+from snowflake.snowpark.window import Window
+
+RESAMPLE_INDEX_LABEL = "__resample_index__"
+
+SNOWFLAKE_TIMESLICE_ALIGNMENT_DATE = "1970-01-01 00:00:00"
+
+IMPLEMENTED_AGG_METHODS = ["max", "min", "mean", "median", "sum", "std", "var", "count"]
+IMPLEMENTED_MISC_METHODS = ["ffill"]
+
+ResampleMethodTypeLit = Literal["ffill", "max", "min", "mean"]
+
+SUPPORTED_RESAMPLE_RULES = ["day", "hour", "second", "minute"]
+
+
+# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
+ALL_DATEOFFSET_STRINGS = [
+    "B",
+    "C",
+    "W",
+    "ME",
+    "MS",
+    "BME",
+    "BMS",
+    "CBME",
+    "CBMS",
+    "SME",
+    "SMS",
+    "QE",
+    "QS",
+    "BQE",
+    "BQS",
+    "YE",
+    "YS",
+    "BYS",
+    "BYE",
+    "bh",
+    "cbh",
+    "D",
+    "h",
+    "min",
+    "s",
+    "ms",
+    "us",
+    "ns",
+]
+
+SNOWFLAKE_SUPPORTED_DATEOFFSETS = ["W", "ME", "QE", "QS", "YS", "D", "h", "min", "s"]
+
+IMPLEMENTED_DATEOFFSET_STRINGS = ["min", "s", "h", "D"]
+
+UNSUPPORTED_DATEOFFSET_STRINGS = list(
+    # sort so that tests that generate test cases from this last always use the
+    # list in the same order (see SNOW-1000116).
+    sorted(set(ALL_DATEOFFSET_STRINGS) - set(SNOWFLAKE_SUPPORTED_DATEOFFSETS))
+)
+
+NOT_IMPLEMENTED_DATEOFFSET_STRINGS = list(
+    # sort so that tests that generate test cases from this last always use the
+    # list in the same order (see SNOW-1000116).
+    sorted(set(SNOWFLAKE_SUPPORTED_DATEOFFSETS) - set(IMPLEMENTED_DATEOFFSET_STRINGS))
+)
+
+
+def rule_to_snowflake_width_and_slice_unit(rule: str) -> tuple[int, str]:
+    """
+    Converts pandas resample bin rule to Snowflake's slice_width and slice_unit
+    format.
+
+    Parameters
+    ----------
+    rule : str
+        The offset string representing resample bin size. For example: '1D', '2T', etc.
+
+    Returns
+    -------
+    slice_width : int
+        Width of the slice (i.e. how many units of time are contained in the slice).
+
+    slice_unit : str
+        Time unit for the slice length.
+
+    Raises
+    ------
+    ValueError
+        A ValueError is raised if an invalid rule is passed in.
+
+    NotImplementedError
+        A NotImplementedError is raised if we cannot map the pandas rule to
+        a Snowflake date or time unit.
+    """
+
+    try:
+        offset = to_offset(rule)
+    except ValueError:
+        raise ValueError(f"Invalid frequency: {rule}.")
+
+    rule_code = offset.rule_code
+    slice_width = offset.n
+
+    if rule_code == "s":
+        slice_unit = "second"
+    elif rule_code == "min":
+        slice_unit = "minute"
+    elif rule_code == "h":
+        slice_unit = "hour"
+    elif rule_code == "D":
+        slice_unit = "day"
+    elif rule_code[0] == "W":  # pragma: no cover
+        # treat codes like W-MON and W-SUN as "week":
+        slice_unit = "week"
+    elif rule_code == "ME":  # pragma: no cover
+        slice_unit = "month"
+    elif rule_code[0] == "QE":  # pragma: no cover
+        # treat codes like Q-DEC and Q-JAN as "quarter":
+        slice_unit = "quarter"
+    elif rule_code[0] == "YE":  # pragma: no cover
+        # treat codes like A-DEC and A-JAN as "year":
+        slice_unit = "year"
+    else:
+        raise NotImplementedError(
+            f"Unsupported frequency: {rule}. Snowpark pandas cannot map {rule} "
+            f"to a Snowflake date or time unit."
+        )
+
+    return slice_width, slice_unit
+
+
+def _argument_not_implemented(param: str, arg: Any) -> Optional[NoReturn]:
+    """
+    Raises a NotImplementedError for an argument `arg`
+    that is unsupported by parameter `param`.
+
+    Parameters
+    ----------
+    param : str
+        Name of the parameter.
+
+    arg : Any
+        Unsupported argument of parameter `param`.
+
+    Raises
+    ------
+    NotImplementedError
+    """
+    return ErrorMessage.not_implemented(
+        f"Resample argument {arg} for parameter {param} is not implemented for Resampler!"
+    )
+
+
+def validate_resample_supported_by_snowflake(
+    resample_kwargs: dict[str, Any]
+) -> Optional[NoReturn]:
+    """
+    Checks whether execution with Snowflake engine is available for resample operation.
+
+    Parameters:
+    ----------
+    resample_kwargs : Dict[str, Any]
+        keyword arguments of Resample operation. rule, axis, axis, etc.
+
+    Raises
+    ------
+    NotImplementedError
+        Raises a NotImplementedError if a keyword argument of resample has an
+        unsupported parameter-argument combination.
+    """
+    rule = resample_kwargs.get("rule")
+
+    _, slice_unit = rule_to_snowflake_width_and_slice_unit(
+        rule  # type:  ignore[arg-type]
+    )
+
+    if slice_unit not in SUPPORTED_RESAMPLE_RULES:
+        _argument_not_implemented("rule", rule)
+
+    axis = resample_kwargs.get("axis")
+    if axis != 0:  # pragma: no cover
+        _argument_not_implemented("axis", axis)
+
+    closed = resample_kwargs.get("closed")
+    if closed is not None:  # pragma: no cover
+        _argument_not_implemented("closed", closed)
+
+    label = resample_kwargs.get("label")
+    if label is not None:  # pragma: no cover
+        _argument_not_implemented("label", label)
+
+    convention = resample_kwargs.get("convention")
+    if convention != "start":  # pragma: no cover
+        _argument_not_implemented("convention", convention)
+
+    kind = resample_kwargs.get("kind")
+    if kind is not None:  # pragma: no cover
+        _argument_not_implemented("kind", kind)
+
+    on = resample_kwargs.get("on")
+    if on is not None:  # pragma: no cover
+        _argument_not_implemented("on", on)
+
+    level = resample_kwargs.get("level")
+    if level is not None:  # pragma: no cover
+        _argument_not_implemented("level", level)
+
+    origin = resample_kwargs.get("origin")
+    if origin != "start_day":  # pragma: no cover
+        _argument_not_implemented("origin", origin)
+
+    offset = resample_kwargs.get("offset")
+    if offset is not None:  # pragma: no cover
+        _argument_not_implemented("offset", offset)
+
+    group_keys = resample_kwargs.get("group_keys")
+    if group_keys is not no_default:  # pragma: no cover
+        _argument_not_implemented("group_keys", group_keys)
+
+    return None
+
+
+def get_snowflake_quoted_identifier_for_resample_index_col(frame: InternalFrame) -> str:
+    """
+    Returns Snowflake quoted identifier of a column corresponding to a DatetimeIndex in an InternalFrame
+    `frame`. Raises TypeError, if more than one index column is present, or index column can not be interpreted as a
+    DatetimeIndex column.
+
+    Parameters
+    ----------
+    frame : InternalFrame
+        Internal frame to perform resampling on.
+
+    Returns
+    -------
+    index_col : str
+        Snowflake quoted identifier of a column corresponding to a DatetimeIndex in an InternalFrame
+
+    Raises
+    ------
+        TypeError if the dataframe's index is not a DatetimeIndex.
+    """
+
+    index_cols = frame.index_column_snowflake_quoted_identifiers
+
+    if len(index_cols) > 1:
+        raise TypeError(
+            "Only valid with DatetimeIndex, but got an instance of 'MultiIndex'"
+        )
+
+    index_col = index_cols[0]
+    sf_type = frame.quoted_identifier_to_snowflake_type()[index_col]
+
+    if not isinstance(sf_type, (TimestampType, DateType)):
+        raise TypeError("Only valid with DatetimeIndex.")
+
+    return index_col
+
+
+# TODO SNOW-964799: Add TIME_SLICE as a Snowpark Column function.
+def time_slice(
+    column: ColumnOrName,
+    slice_length: int,
+    date_or_time_part: str,
+    start_or_end: Union[str, Literal["start"]] = "start",
+) -> Column:
+    """
+    Calculates the beginning or end of a “slice” of time, where
+    the length of the slice is a multiple of a standard unit of
+    time (minute, hour, day, etc.).
+
+    `Supported date and time parts <https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts>`_
+
+    Parameters
+    ----------
+    column : ColumnOrName
+        The timestamp column to calculate the time slice of.
+
+    slice_length : str
+        Width of the slice (i.e. how many units of time are contained
+        in the slice). For example, if the unit is MONTH and the slice_length is 2, then
+        each slice is 2 months wide. The slice_length must be an integer greater than or equal to 1.
+
+    date_or_time_part : str
+        Time unit for the slice length.
+
+    start_or_end : str, default 'start'
+        Determines whether the start or end of the slice should be returned.
+
+    Returns
+    -------
+    column : Column
+        Beginning or end of a "slice" of time.
+    """
+    return builtin("TIME_SLICE")(column, slice_length, date_or_time_part, start_or_end)
+
+
+def perform_resample_binning_on_frame(
+    frame: InternalFrame, start_date: str, bin_size: str
+) -> InternalFrame:
+    """
+    Returns a new dataframe where each item of the index column
+    is set to its resample bin.
+
+    Parameters
+    ----------
+    frame : InternalFrame
+        The internal frame with a single DatetimeIndex column
+        to perform resample binning on.
+
+    start_date : str
+        The earliest date in the Datetime index column of
+        `frame`.
+
+    bin_size : str
+        The offset string or object representing target conversion.
+
+    Returns
+    -------
+    frame : InternalFrame
+        A new internal frame where items in the index column are
+        placed in a bin based on `bin_length` and `bin_unit`
+    """
+
+    slice_width, slice_unit = rule_to_snowflake_width_and_slice_unit(bin_size)
+    # Consider the following example:
+    # frame:
+    #             data_col
+    # date
+    # 2023-08-07         1
+    # 2023-08-08         2
+    # 2023-08-09         3
+    # 2023-08-10         4
+    # 2023-08-11         5
+    # 2023-08-14         6
+    # 2023-08-15         7
+    # 2023-08-16         8
+    # 2023-08-17         9
+    # start_date = 2023-08-07, bin_size = 3D (3 days)
+
+    datetime_index_col = get_snowflake_quoted_identifier_for_resample_index_col(frame)
+
+    # Time slices in Snowflake are aligned to snowflake_timeslice_alignment_date,
+    # so we must normalize input datetimes.
+    normalization_amt = (
+        native_pd.to_datetime(start_date)
+        - native_pd.to_datetime(SNOWFLAKE_TIMESLICE_ALIGNMENT_DATE)
+    ).total_seconds()
+
+    # Subtract the normalization amount in seconds from the input datetime.
+    normalized_dates = to_timestamp(
+        datediff("second", to_timestamp(lit(normalization_amt)), datetime_index_col)
+    )
+    # frame:
+    #             data_col
+    # date
+    # 1970-01-01         1
+    # 1970-01-02         2
+    # 1970-01-03         3
+    # 1970-01-04         4
+    # 1970-01-05         5
+    # 1970-01-08         6
+    # 1970-01-09         7
+    # 1970-01-10         8
+    # 1970-01-11         9
+
+    # Call time_slice on the normalized datetime column with the slice_width and slice_unit.
+    normalized_dates_set_to_bins = time_slice(normalized_dates, slice_width, slice_unit)
+    # frame:
+    #             data_col
+    # date
+    # 1970-01-01         1
+    # 1970-01-01         2
+    # 1970-01-01         3
+    # 1970-01-04         4
+    # 1970-01-04         5
+    # 1970-01-07         6
+    # 1970-01-07         7
+    # 1970-01-10         8
+    # 1970-01-10         9
+
+    # Add the normalization amount in seconds back to the input datetime for the correct result.
+    unnormalized_dates_set_to_bins = dateadd(
+        "second", lit(normalization_amt), normalized_dates_set_to_bins
+    )
+    # frame:
+    #             data_col
+    # date
+    # 2023-08-07         1
+    # 2023-08-07         2
+    # 2023-08-07         3
+    # 2023-08-10         4
+    # 2023-08-10         5
+    # 2023-08-13         6
+    # 2023-08-13         7
+    # 2023-08-16         8
+    # 2023-08-16         9
+
+    return frame.update_snowflake_quoted_identifiers_with_expressions(
+        {datetime_index_col: unnormalized_dates_set_to_bins}
+    ).frame
+
+
+def get_expected_resample_bins_frame(
+    rule: str, start_date: str, end_date: str
+) -> InternalFrame:
+    """
+    Returns an InternalFrame with a single DatetimeIndex column that holds the
+    expected resample bins computed using rule, start_date, and end_date.
+    Parameters:
+    ----------
+    rule : str
+        The offset string or object representing target conversion.
+
+    start_date : str
+        The earliest date in the timeseries data.
+
+    end_date : str
+         The latest date in the timeseries data.
+
+    Returns
+    -------
+    frame : InternalFrame
+        A new internal frame with the expected resample bins.
+
+    Examples
+    --------
+    frame = get_expected_resample_bins_frame("2D", "2020-01-03", "2020-01-10")
+
+    frame:
+    __resample_index__
+    2020-01-03
+    2020-01-05
+    2020-01-07
+    2020-01-09
+    """
+    slice_width, slice_unit = rule_to_snowflake_width_and_slice_unit(rule)
+
+    index_column_snowflake_quoted_identifiers = (
+        generate_snowflake_quoted_identifiers_helper(
+            pandas_labels=[RESAMPLE_INDEX_LABEL]
+        )
+    )
+
+    # row_number ensures there are no gaps in the sequence.
+    all_resample_bins_col = dateadd(
+        slice_unit,
+        (row_number().over(Window.order_by(lit(1))) - 1) * slice_width,
+        to_timestamp(lit(start_date)),
+    ).as_(index_column_snowflake_quoted_identifiers[0])
+
+    rowcount = math.floor(
+        (native_pd.to_datetime(end_date) - native_pd.to_datetime(start_date))
+        / to_offset(rule)
+        + 1
+    )
+
+    expected_resample_bins_snowpark_frame = pd.session.generator(
+        all_resample_bins_col, rowcount=rowcount
+    )
+
+    return InternalFrame.create(
+        ordered_dataframe=OrderedDataFrame(
+            DataFrameReference(expected_resample_bins_snowpark_frame)
+        ),
+        data_column_pandas_labels=[],
+        data_column_snowflake_quoted_identifiers=[],
+        index_column_pandas_labels=[RESAMPLE_INDEX_LABEL],
+        index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=[None],
+    )
+
+
+def fill_missing_resample_bins_for_frame(
+    frame: InternalFrame, rule: str, start_date: str, end_date: str
+) -> InternalFrame:
+    """
+    Returns a new InternalFrame created using 2 rules.
+    1. Missing resample bins in `frame`'s DatetimeIndex column will be created.
+    2. Missing rows in data column will be filled with `None`.
+
+    Parameters:
+    ----------
+    frame : InternalFrame
+        A frame with a single DatetimeIndex column.
+
+    rule : str
+        The offset string or object representing target conversion.
+
+    start_date : str
+        The earliest date in the DatetimeIndex column of `frame`.
+
+    end_date : str
+        The latest date in the DatetimeIndex column of `frame`.
+
+    Returns
+    -------
+    frame : InternalFrame
+        A new internal frame with no missing rows in the resample operation.
+
+    Examples
+    --------
+    input_frame
+                a   b
+    __index__
+    2020-01-03  1   2
+    2020-01-07  3   5
+    2020-01-09  4   6
+
+    frame = fill_missing_resample_bins_for_frame(input_frame, '2D', "2020-01-03", "2020-01-12")
+
+    frame:
+                  a     b
+    __index__
+    2020-01-03    1     2
+    2020-01-05  NaN   NaN
+    2020-01-07    3     5
+    2020-01-09    4     6
+    2020-01-11  NaN   NaN
+    """
+    # Compute expected resample bins based on start_date, end_date and rule.
+    expected_resample_bins_frame = get_expected_resample_bins_frame(
+        rule, start_date, end_date
+    )
+    # For example, if start_date = '2020-01-01', end_date = '2020-01-05' and rule = '1D'
+    #
+    # expected_resample_bins_frame:
+    # __resample_index__
+    # 2020-01-01
+    # 2020-01-02
+    # 2020-01-03
+    # 2020-01-04
+    # 2020-01-05
+
+    # Join on expected expected_resample_bins_frame to fill in missing resample bins.
+    # Suppose the expected resample bins is as shown above.
+    # and `frame` is missing resample bins. (2020-01-03 is missing)
+    #
+    # frame:
+    #             agg_result
+    #   date_col
+    # 2020-01-01           1
+    # 2020-01-02           2
+    # 2020-01-04           3
+    # 2020-01-05           4
+    #
+    # After the join, the missing date is populated in `frame`'s
+    # DatetimeIndex column and a None is found in the data column.
+    #
+    # resample_bins_dataframe:
+    #             agg_result
+    #   date_col
+    # 2020-01-01           1
+    # 2020-01-02           2
+    # 2020-01-03        None
+    # 2020-01-04           3
+    # 2020-01-05           4
+    joined_frame = join(
+        left=frame,
+        right=expected_resample_bins_frame,
+        how="right",
+        left_on=frame.index_column_snowflake_quoted_identifiers,
+        right_on=expected_resample_bins_frame.index_column_snowflake_quoted_identifiers,
+        inherit_join_index=InheritJoinIndex.FROM_RIGHT,
+    ).result_frame
+
+    # Ensure data_column_pandas_index_names is correct.
+    return InternalFrame.create(
+        ordered_dataframe=joined_frame.ordered_dataframe,
+        data_column_pandas_labels=frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=joined_frame.index_column_snowflake_quoted_identifiers,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+    )
+
+
+# TODO: SNOW-989398 Migrate function to ASOF join
+def perform_asof_join_on_frame(
+    preserving_frame: InternalFrame, referenced_frame: InternalFrame
+) -> InternalFrame:
+    """
+    Returns a new InternalFrame that performs an ASOF join on the preserving
+    frame against the referenced frame. All frame metadata, such as data column
+    and index column labels, are inherited from referenced_frame. For each timestamp,
+    p, in preserving_frame's DatetimeIndex, the join finds a single row in
+    referenced_frame with timestamp, r, such that r <= p. The qualifying row on selected
+    from referenced_frame is the closest match, either equal in time or earlier in time.
+    If a qualifying row is not found in the referenced_frame, the data columns are padded
+    with NULL values.
+
+    Parameters
+    ----------
+    preserving_frame : InternalFrame
+       The frame to select the closest match for using its DatetimeIndex.
+
+    referenced_frame: InternalFrame
+        The frame to select the closest match from using its DatetimeIndex.
+
+    Returns
+    -------
+    frame : InternalFrame
+        A new frame that holds the result of an ASOF join.
+    """
+    # Consider the following example:
+    #
+    # preserved_frame:
+    #  __resample_index__
+    # 2023-01-03 00:00:00
+    # 2023-01-05 00:00:00
+    # 2023-01-07 00:00:00
+    # 2023-01-09 00:00:00
+    #
+    # referenced_frame:
+    #                         a
+    #           __index__
+    # 2023-01-03 01:00:00     1
+    # 2023-01-04 00:00:00     2
+    # 2023-01-05 23:00:00     3
+    # 2023-01-06 00:00:00     4
+    # 2023-01-07 02:00:00   NaN
+    # 2023-01-10 00:00:00     6
+
+    # We want to perform an ASOF JOIN of preserving_frame and referenced_frame. Here
+    # are the steps to take:
+
+    # 1. Construct right_frame using referenced_frame, which has a
+    # temporary column, interval_end_col, that olds the closest
+    # following timestamp to every value in __index__. The last value in
+    # interval_end_col is dummy value that represents the largest
+    # possible date in Snowflake.
+    interval_end_pandas_label = "interval_end_col"
+    interval_start_snowflake_quoted_identifier = (
+        get_snowflake_quoted_identifier_for_resample_index_col(referenced_frame)
+    )
+    interval_end_col = coalesce(
+        lead(col(interval_start_snowflake_quoted_identifier)).over(
+            Window.order_by(col(interval_start_snowflake_quoted_identifier).asc())
+        ),
+        pandas_lit("9999-01-01 00:00:00"),
+    )
+    right_frame = referenced_frame.append_column(
+        interval_end_pandas_label, interval_end_col
+    )
+    # right_frame:
+    #                         a      interval_end_col
+    #           __index__
+    # 2023-01-03 01:00:00     1   2023-01-04 00:00:00
+    # 2023-01-04 00:00:00     2   2023-01-05 23:00:00
+    # 2023-01-05 23:00:00     3   2023-01-06 00:00:00
+    # 2023-01-06 00:00:00     4   2023-01-07 02:00:00
+    # 2023-01-07 02:00:00   NaN   2023-01-10 00:00:00
+    # 2023-01-10 00:00:00     6   9999-01-01 00:00:00
+
+    # 2. Get the Snowflake identifiers needed for the join condition.
+    # interval_start_snowflake_quoted_identifier is needed as well,
+    # but has already been fetched above.
+    left_timecol_snowflake_quoted_identifier = (
+        get_snowflake_quoted_identifier_for_resample_index_col(preserving_frame)
+    )
+    interval_end_snowflake_quoted_identifier = (
+        right_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            pandas_labels=[interval_end_pandas_label]
+        )[0][0]
+    )
+
+    # 3. Convert both preserved_frame and right_frame to Snowpark DataFrames to perform
+    # a non-equi-join.
+    left_snowpark_df = (
+        preserving_frame.ordered_dataframe.to_projected_snowpark_dataframe()
+    )
+    right_snowpark_df = right_frame.ordered_dataframe.to_projected_snowpark_dataframe()
+
+    # 4. Join left_snowpark_df and right_snowpark_df using the following logic:
+    # For each element left_frame's __resample_index__, join it with a single row
+    # in right_frame whose __index__ value is less than or equal to it and is closest in time.
+    # If a row cannot be found, pad the joined columns from right_frame with null.
+    joined_snowpark_df = left_snowpark_df.join(
+        right=right_snowpark_df,
+        on=(
+            left_snowpark_df[left_timecol_snowflake_quoted_identifier]
+            >= right_snowpark_df[interval_start_snowflake_quoted_identifier]
+        )
+        & (
+            left_snowpark_df[left_timecol_snowflake_quoted_identifier]
+            < right_snowpark_df[interval_end_snowflake_quoted_identifier]
+        ),
+        how="left",
+    )
+    # joined_snowpark_df:
+    #
+    #  __resample_index__             __index__     a      interval_end_col
+    # 2023-01-03 00:00:00                  NULL  NULL   NULL
+    # 2023-01-05 00:00:00   2023-01-04 00:00:00     2   2023-01-05 23:00:00
+    # 2023-01-07 00:00:00   2023-01-06 00:00:00     4   2023-01-07 02:00:00
+    # 2023-01-09 00:00:00   2023-01-07 02:00:00  NULL   2023-01-10 00:00:00
+
+    # 5. Construct a final result with correct frame metadata.
+    #                         a
+    # __resample_index__
+    # 2023-01-03 00:00:00   NaN
+    # 2023-01-05 00:00:00     2
+    # 2023-01-07 00:00:00     4
+    # 2023-01-09 00:00:00   NaN
+    return InternalFrame.create(
+        ordered_dataframe=OrderedDataFrame(DataFrameReference(joined_snowpark_df)),
+        data_column_pandas_labels=referenced_frame.data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=referenced_frame.data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=referenced_frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=[
+            left_timecol_snowflake_quoted_identifier
+        ],
+        data_column_pandas_index_names=referenced_frame.data_column_pandas_index_names,
+    )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/session.py b/src/snowflake/snowpark/modin/plugin/_internal/session.py
new file mode 100644
index 00000000000..e9db7945d99
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/session.py
@@ -0,0 +1,97 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import sys
+from types import ModuleType
+from typing import Any, Callable, Optional
+
+# import the entire context submodule instead of just get_active_session so
+# that we can mock get_active_session
+import snowflake.snowpark.context
+from snowflake.snowpark.exceptions import SnowparkSessionException
+from snowflake.snowpark.session import Session, _active_sessions
+
+
+def _subimport(name: str) -> ModuleType:
+    """
+    We need this to pickle the session holder class: https://github.com/cloudpipe/cloudpickle/issues/405#issuecomment-756085104
+    """
+    __import__(name)
+    return sys.modules[name]
+
+
+class SnowpandasSessionHolder(ModuleType):
+    """
+    This class implements the pattern [1] to make "session" a singleton.
+
+    [1] https://docs.python.org/3.12/reference/datamodel.html#customizing-module-attribute-access
+    """
+
+    _session: Optional[Session] = None
+    """
+    The Snowpark session that Snowpark pandas DataFrame or Series will use.
+
+    It starts as `None`, but if you try to access it when it's `None`:
+      - If there is a unique active Snowpark session, snowpark assigns that one to ``session``.
+      - If there are no active sessions, or multiple sessions, Snowpark will raise an exception.
+
+    You can assign a value to this session as you would normally assign a
+    value to a module property, e.g. `pd.session = session1`.
+    """
+
+    def _get_active_session(self) -> Session:
+        if self._session is not None and self._session in _active_sessions:
+            return self._session
+
+        try:
+            session = snowflake.snowpark.context.get_active_session()
+            self._session = session
+            return session
+        except SnowparkSessionException as ex:
+            if ex.error_code == "1409":
+                raise SnowparkSessionException(
+                    "There are multiple active snowpark sessions, but you need to choose one for Snowpark pandas. "
+                    + "Please assign one to Snowpark pandas with a statement like `modin.pandas.session = session`."
+                ) from ex
+            if ex.error_code == "1403":
+                raise SnowparkSessionException(
+                    "Snowpark pandas requires an active snowpark session, but there is none. Please create one "
+                    + "by following the instructions here: https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-session#creating-a-session"
+                ) from ex
+            raise
+
+    def __setattr__(self, attr: str, value: Any) -> None:
+        # If this module is modin.pandas, delegate the attribute to snowflake.snowpark.modin.pandas
+        if self.__package__ == "modin.pandas" and attr == "session":
+            import snowflake.snowpark.modin.pandas as spd
+
+            setattr(spd, attr, value)
+            return
+        if attr == "session":
+            self._session = value
+        else:
+            super().__setattr__(attr, value)
+
+    def __getattr__(self, name: str) -> Any:
+        # If this module is modin.pandas, delegate the attribute to snowflake.snowpark.modin.pandas
+        if self.__package__ == "modin.pandas" and name == "session":
+            import snowflake.snowpark.modin.pandas as spd
+
+            return getattr(spd, name)
+        return (
+            self._get_active_session()
+            if name == "session"
+            else super().__getattribute__(name)
+        )
+
+    def __reduce__(self) -> tuple[Callable[[str], ModuleType], tuple[str]]:
+        """
+        Implement a custom pickle method so this class is pickleable.
+
+        We need to pickle this class to use the Snowpark pandas module in
+        stored procedures.
+
+        Explanation of why we need this to pickle the class: https://github.com/cloudpipe/cloudpickle/issues/405#issuecomment-756085104
+        """
+        return _subimport, (self.__name__,)
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/statement_params_constants.py b/src/snowflake/snowpark/modin/plugin/_internal/statement_params_constants.py
new file mode 100644
index 00000000000..d075b93bfff
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/statement_params_constants.py
@@ -0,0 +1,22 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# keys used in the statement parameters for Snowpark pandas
+SNOWPARK_API = "SNOWPARK_API"
+# state params keys used by read only table
+# the reason why temp table creation is triggered for creating read only table
+MATERIALIZATION_REASON = "MATERIALIZATION_REASON"
+# the name for the source table (can be view or others) that is used to create the temp table from
+MATERIALIZATION_SOURCE_TABLE_NAME = "MATERIALIZATION_SOURCE_TABLE_NAME"
+# the name for the temp table created
+MATERIALIZATION_TABLE_NAME = "MATERIALIZATION_TABLE_NAME"
+# the source table that the readonly table is created on top of.
+READONLY_SOURCE_TABLE_NAME = "READONLY_SOURCE_TABLE_NAME"
+# the read only table created
+READONLY_TABLE_NAME = "READONLY_TABLE_NAME"
+
+# values used in the statement parameters for Snowpark pandas
+PANDAS_API = "pandas"
+UNKNOWN = "UNKNOWN"
+CONTAINS_ORDER_BY = "CONTAINS_ORDER_BY"
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py
new file mode 100644
index 00000000000..402e3ae0030
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+import inspect
+import re
+from contextlib import nullcontext
+from enum import Enum, unique
+from typing import Any, Callable, Optional, Union
+
+from typing_extensions import ParamSpec
+
+import snowflake.snowpark.session
+from snowflake.connector.telemetry import TelemetryField as PCTelemetryField
+from snowflake.snowpark._internal.telemetry import TelemetryField, safe_telemetry
+from snowflake.snowpark.exceptions import SnowparkSessionException
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    is_snowpark_pandas_dataframe_or_series_type,
+)
+from snowflake.snowpark.query_history import QueryHistory
+from snowflake.snowpark.session import Session
+
+# Define ParamSpec with "_Args" as the generic parameter specification similar to Any
+_Args = ParamSpec("_Args")
+
+
+@unique
+class SnowparkPandasTelemetryField(Enum):
+    TYPE_SNOWPARK_PANDAS_FUNCTION_USAGE = "snowpark_pandas_function_usage"
+    # function categories
+    FUNC_CATEGORY_SNOWPARK_PANDAS = "snowpark_pandas"
+    # keyword argument
+    ARGS = "argument"
+    # fallback flag
+    IS_FALLBACK = "is_fallback"
+
+
+# Argument truncating size after converted to str. Size amount can be later specified after analysis and needs.
+ARG_TRUNCATE_SIZE = 100
+
+
+@safe_telemetry
+def _send_snowpark_pandas_telemetry_helper(
+    *,
+    session: Session,
+    telemetry_type: str,
+    func_name: str,
+    query_history: Optional[QueryHistory],
+    api_calls: Union[str, list[dict[str, Any]]],
+) -> None:
+    """
+    A helper function that sends Snowpark pandas API telemetry data.
+    _send_snowpark_pandas_telemetry_helper does not raise exception by using @safe_telemetry
+
+    Args:
+        session: The Snowpark session.
+        telemetry_type: telemetry type. e.g. TYPE_SNOWPARK_PANDAS_FUNCTION_USAGE.value
+        func_name: The name of the function being tracked.
+        query_history: The query history context manager to record queries that are pushed down to the Snowflake
+        database in the session.
+        api_calls: Optional list of Snowpark pandas API calls made during the function execution.
+
+    Returns:
+        None
+    """
+    data: dict[str, Union[str, list[dict[str, Any]], list[str]]] = {
+        TelemetryField.KEY_FUNC_NAME.value: func_name,
+        TelemetryField.KEY_CATEGORY.value: SnowparkPandasTelemetryField.FUNC_CATEGORY_SNOWPARK_PANDAS.value,
+    }
+    if len(api_calls) > 0:
+        data[TelemetryField.KEY_API_CALLS.value] = api_calls
+    if query_history is not None and len(query_history.queries) > 0:
+        data[TelemetryField.KEY_SFQIDS.value] = [
+            q.query_id for q in query_history.queries
+        ]
+    message: dict = {
+        **session._conn._telemetry_client._create_basic_telemetry_data(telemetry_type),
+        TelemetryField.KEY_DATA.value: data,
+        PCTelemetryField.KEY_SOURCE.value: "SnowparkPandas",
+    }
+    session._conn._telemetry_client.send(message)
+
+
+def _not_equal_to_default(arg_val: Any, default_val: Any) -> bool:
+    # Return True if argument arg_val is not equal to its default value.
+    try:
+        # First check type to early return True if equality assertion could be avoided
+        # to avoid potential undesired behaviour equality assertion of two different types.
+        if type(arg_val) != type(default_val):
+            return True
+        # We assume dataframe/series type default value is not a dataframe/series type
+        # to avoid equality assertion since the equality assertion of DataFrame/Series might cause additional
+        # API calls which we don't want to be added to telemetry.
+        if is_snowpark_pandas_dataframe_or_series_type(default_val):
+            return True
+        return arg_val != default_val
+    except Exception:
+        # Similar to @Safe_telemetry but return a value
+        # We don't want telemetry to raise exception and returning False makes sure
+        # arguments that raise exception will not be collected in telemetry.
+        return False
+
+
+def _try_get_kwargs_telemetry(
+    *,
+    func: Callable,
+    args: tuple[Any, ...],
+    kwargs: dict[str, Any],
+) -> list[str]:
+    """
+    Try to get the key word argument names for telemetry.
+
+    These arguments:
+        Must be passed-in;
+        Must have a default value;
+        The overridden value must be different from the default one
+    Arguments are in the original order of their definition.
+
+    Args:
+        func: The function being decorated.
+        args: The positional arguments passed to the function.
+        kwargs: The keyword arguments passed to the function.
+
+    Returns:
+        List: a List of function arguments names
+    """
+    signature = inspect.signature(func)
+    try:
+        bound_args = signature.bind(*args, **kwargs)
+        return [
+            param_name
+            for param_name, param in signature.parameters.items()
+            if (
+                param_name in bound_args.arguments
+                and param.default is not inspect.Parameter.empty
+                and _not_equal_to_default(
+                    bound_args.arguments[param_name], param.default
+                )
+            )
+        ]
+    except Exception:
+        # silence any exception from inspect to make telemetry safe, e.g., signature.bind may raise TypeError when
+        # missing a required argument
+        return []
+
+
+def _run_func_helper(
+    func: Callable[_Args, Any],
+    args: tuple[Any, ...],
+    kwargs: dict[str, Any],
+) -> Any:
+    """
+    The helper function that run func, suppressing the possible previous telemetry exception context.
+
+    Args:
+        func: The function being run.
+        args: The positional arguments passed to the function.
+        kwargs: The keyword arguments passed to the function.
+
+    Returns:
+        The return value of the function
+    """
+    try:
+        return func(*args, **kwargs)
+    except Exception as e:
+        # Raise error caused by func, i.e. the api call
+        # while suppressing Telemetry caused exceptions like SnowparkSessionException from telemetry in the stack trace.
+        # This prevents from adding telemetry error messages to regular API calls error messages.
+        raise e from None
+
+
+def error_to_telemetry_type(e: Exception) -> str:
+    """
+    Convert Error to Telemetry Type string
+    Ex. NotImplementedError --> "snowpark_pandas_not_implemented_error"
+
+    Parameters
+    ----------
+    e: The desired exception to convert to telemetry type
+
+    Returns
+    -------
+    The telemetry type used to send telemetry.
+    """
+    error_class = re.findall("[A-Z]?[a-z]+", type(e).__name__)
+    telemetry_type = "snowpark_pandas_" + "_".join(
+        [word.lower() for word in error_class]
+    )
+    return telemetry_type
+
+
+def _gen_func_name(
+    class_prefix: str, func: Callable[_Args, Any], property_name: Optional[str] = None
+) -> str:
+    """
+    Generate function name for telemetry.
+
+    Args:
+        class_prefix: the class name as the prefix of the function name
+        func: the main function
+        property_name: the property name if the function is used by a property, e.g., `index`, `name`, `iloc`, `loc`,
+        `dtype`, etc
+
+    Returns:
+        The generated function name
+    """
+    func_name = func.__qualname__
+    if property_name:
+        func_name = func_name.replace("__get__", f"{property_name}_get").replace(
+            "__set__", f"{property_name}_set"
+        )
+    return f"{class_prefix}.{func_name}"
+
+
+def _telemetry_helper(
+    *,
+    func: Callable[_Args, Any],
+    args: tuple[Any, ...],
+    kwargs: dict[str, Any],
+    is_standalone_function: bool,
+    property_name: Optional[str] = None,
+) -> Any:
+    """
+    Helper function for the main process of all two telemetry decorators: snowpark_pandas_telemetry_method_decorator &
+    snowpark_pandas_telemetry_standalone_function_decorator.
+    It prepares telemetry message, deals with errors, runs the decorated function and sends telemetry
+
+    Note:
+        _telemetry_helper does not interfere with the normal execution of the decorated function, meaning that
+    most telemetry related exceptions are suppressed, ensuring telemetry does not introduce new exceptions.
+    However, if the decorated function raises an exception and fails, such exception will be raised.
+
+    Args:
+        func: The API function to be called.
+        args: The arguments to be passed to the API function.
+        kwargs: The keyword arguments to be passed to the API function.
+        is_standalone_function: Indicate whether the decorated function is a standalone function. A standalone function
+        in Python is a function defined outside a class or any other enclosing structure, callable directly without
+        an instance of a class.
+        property_name: the property name if the `func` is from a property.
+
+    Returns:
+        The return value of the API function.
+
+    Raises:
+        Any exceptions raised by the API function.
+    """
+    # We manage existing api calls in this telemetry decorator so API developer does not need to worry about it. The way
+    # we do so is that we first move it out of current args[0] and cached in existing_api_calls. The after generate the
+    # current api call for telemetry, we will append it back. We did this way because this telemetry method can be
+    # called recursively, e.g., df.where will trigger this decorator twice: once for itself and once for base.where.
+    # Moving existing api call out first can avoid to generate duplicates.
+    existing_api_calls = []
+    need_to_restore_args0_api_calls = False
+
+    # If the decorated func is a class method or a standalone function, we need to get an active session:
+    if is_standalone_function or (len(args) > 0 and isinstance(args[0], type)):
+        try:
+            session = snowflake.snowpark.session._get_active_session()
+        except SnowparkSessionException:
+            return _run_func_helper(func, args, kwargs)
+        class_prefix = (
+            func.__module__.split(".")[-1]
+            if is_standalone_function
+            else args[0].__name__
+        )
+    # Else the decorated func is an instance method:
+    else:
+        try:
+            # Methods decorated in (snow_)dataframe/series.py
+            existing_api_calls = args[0]._query_compiler.snowpark_pandas_api_calls
+            args[0]._query_compiler.snowpark_pandas_api_calls = []
+            need_to_restore_args0_api_calls = True
+            session = args[0]._query_compiler._modin_frame.ordered_dataframe.session
+            class_prefix = args[0].__class__.__name__
+        except (TypeError, IndexError, AttributeError):
+            # TypeError: args might not support indexing; IndexError: args is empty; AttributeError: args[0] might not
+            # have _query_compiler attribute.
+            # If an exception is raised, mostly due to args[0]._query_compiler, it indicates that the
+            # decorated function does not have the first argument for args[0] or self is a native pandas DataFrame thus
+            # does not have attribute _query_compiler. Such exceptions will not be raised. And we ignore its telemetry.
+            return _run_func_helper(func, args, kwargs)
+
+    # Prepare api_calls' entry: curr_api_call
+    kwargs_telemetry = _try_get_kwargs_telemetry(
+        func=func,
+        args=args,
+        kwargs=kwargs,
+    )
+    # function name will be separated with ".". The first element is the class_prefix, i.e., the object class of the
+    # caller and the rest will be the qualname of the func, which gives more complete information than __name__ and
+    # therefore can be more helpful in debugging, e.g., func_name "DataFrame.DataFrame.dropna" shows the
+    # caller object is Snowpark pandas DataFrame and the function used is from DataFrame's dropna method
+    func_name = _gen_func_name(class_prefix, func, property_name)
+    curr_api_call: dict[str, Any] = {TelemetryField.NAME.value: func_name}
+    if kwargs_telemetry:
+        curr_api_call[SnowparkPandasTelemetryField.ARGS.value] = kwargs_telemetry
+
+    try:
+        # query_history is a QueryHistory instance which is a Context Managers
+        # See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052
+        # Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.
+        # This prevents telemetry from interfering with regular API calls.
+        with getattr(session, "query_history", nullcontext)() as query_history:
+            result = func(*args, **kwargs)
+    except Exception as e:
+        # Send Telemetry and Raise Error
+        _send_snowpark_pandas_telemetry_helper(
+            session=session,
+            telemetry_type=error_to_telemetry_type(e),
+            func_name=func_name,
+            query_history=query_history,
+            api_calls=existing_api_calls + [curr_api_call],
+        )
+        raise e
+
+    # Not inplace lazy APIs: add curr_api_call to the result
+    if is_snowpark_pandas_dataframe_or_series_type(result):
+        result._query_compiler.snowpark_pandas_api_calls = (
+            existing_api_calls
+            + result._query_compiler.snowpark_pandas_api_calls
+            + [curr_api_call]
+        )
+        if need_to_restore_args0_api_calls:
+            args[0]._query_compiler.snowpark_pandas_api_calls = existing_api_calls
+    # TODO: SNOW-911654 Fix telemetry for cases like pd.merge([df1, df2]) and df1.merge(df2)
+    # Inplace lazy APIs: those APIs won't return anything. We also need to exclude one exception "to_snowflake" which
+    # also returns None, since it is an eager API
+    elif (
+        result is None
+        and is_snowpark_pandas_dataframe_or_series_type(args[0])
+        and func.__name__ != "to_snowflake"
+    ):
+        args[0]._query_compiler.snowpark_pandas_api_calls = (
+            existing_api_calls
+            + args[0]._query_compiler.snowpark_pandas_api_calls
+            + [curr_api_call]
+        )
+    # Eager APIs:
+    else:
+        # eager api call should not be stored inside api_calls
+        _send_snowpark_pandas_telemetry_helper(
+            session=session,
+            telemetry_type=SnowparkPandasTelemetryField.TYPE_SNOWPARK_PANDAS_FUNCTION_USAGE.value,
+            func_name=func_name,
+            query_history=query_history,
+            api_calls=existing_api_calls + [curr_api_call],
+        )
+        if need_to_restore_args0_api_calls:
+            args[0]._query_compiler.snowpark_pandas_api_calls = existing_api_calls
+    return result
+
+
+def snowpark_pandas_telemetry_method_decorator(
+    func: Callable, property_name: Optional[str] = None
+) -> Callable:
+    """
+    Decorator function for telemetry of API calls in BasePandasDataset and its subclasses.
+
+    When the decorated function is called, the decorator gets an active session if the decorated function is a
+    class method, and captures any NotImplementedError raised by the function.
+    If the return types is (snow)dataframe/series:
+        then it's lazy not inplace API: set return dataframe's snowpark_pandas_api_calls =
+        old snowpark_pandas_api_calls + return's snowpark_pandas_api_calls + current api call
+    Else if the return types is None:
+        then it's lazy inplace API: set return dataframe's snowpark_pandas_api_calls =
+        old snowpark_pandas_api_calls + current api call
+    Else:
+        it's eager API: send snowpark_pandas_api_calls + current api call
+
+
+    Args:
+        func: the method of (Snowpark pandas) DataFrame/Series whose telemetry is to be collected.
+        property_name: the property name if the `func` is from a property.
+    Returns:
+        The decorator function.
+    """
+
+    @functools.wraps(func)
+    def wrap(*args: Any, **kwargs: Any) -> Callable:
+        return _telemetry_helper(
+            func=func,
+            args=args,
+            kwargs=kwargs,
+            is_standalone_function=False,
+            property_name=property_name,
+        )
+
+    return wrap
+
+
+def snowpark_pandas_telemetry_standalone_function_decorator(func: Callable) -> Callable:
+    """
+    Telemetry decorator for standalone functions.
+
+    When the decorated function is called, the decorator gets an active session and captures any NotImplementedError
+    raised by the function.
+    If the return types is Snowpark pandas Dataframe/Series:
+        then it's lazy not inplace API: set return dataframe's snowpark_pandas_api_calls =
+        old snowpark_pandas_api_calls + return's snowpark_pandas_api_calls + current api call
+    Else:
+        send current api call
+
+
+    Args:
+        func: the method of (Snowpark pandas) DataFrame/Series whose telemetry is to be collected
+    Returns:
+        The decorator function.
+    """
+
+    @functools.wraps(func)
+    def wrap(*args: Any, **kwargs: Any) -> Callable:
+        return _telemetry_helper(
+            func=func,
+            args=args,
+            kwargs=kwargs,
+            is_standalone_function=True,
+        )
+
+    return wrap
+
+
+# The list of private methods that telemetry is enabled. Only those methods are interested to use are collected. Note
+# that we cannot collect "__setattr__" or "__getattr__" because it will cause recursive calls.
+TELEMETRY_PRIVATE_METHODS = {
+    "__getitem__",
+    "__setitem__",
+    "__iter__",
+    "__repr__",
+}
+
+
+class TelemetryMeta(type):
+    def __new__(
+        cls, name: str, bases: tuple, attrs: dict[str, Any]
+    ) -> Union[
+        "snowflake.snowpark.modin.pandas.series.Series",
+        "snowflake.snowpark.modin.pandas.dataframe.DataFrame",
+        "snowflake.snowpark.modin.pandas.groupby.DataFrameGroupBy",
+        "snowflake.snowpark.modin.pandas.resample.Resampler",
+        "snowflake.snowpark.modin.pandas.window.Window",
+        "snowflake.snowpark.modin.pandas.window.Rolling",
+    ]:
+        """
+        Metaclass for enabling telemetry data collection on class/instance methods of
+        Series, DataFrame, GroupBy, Resample, Window, Rolling and their subclasses, i.e. Snowpark pandas DataFrame/Series.
+
+        This metaclass decorates callable class/instance methods which are public or are ``TELEMETRY_PRIVATE_METHODS``
+        with ``snowpark_pandas_telemetry_api_usage`` telemetry decorator.
+        Method arguments returned by _get_kwargs_telemetry are collected otherwise set telemetry_args=list().
+        TelemetryMeta is only set as the metaclass of: snowflake.snowpark.modin.pandas.series.Series,
+         snowflake.snowpark.modin.pandas.dataframe.DataFrame,
+         snowflake.snowpark.modin.pandas.groupby.DataFrameGroupBy,
+         snowflake.snowpark.modin.pandas.resample.Resampler,
+         snowflake.snowpark.modin.pandas.window.Window,
+         snowflake.snowpark.modin.pandas.window.Rolling, and their subclasses.
+
+
+        Args:
+            name (str): The name of the class.
+            bases (tuple): The base classes of the class.
+            attrs (Dict[str, Any]): The attributes of the class.
+
+        Returns:
+            Union[snowflake.snowpark.modin.pandas.series.Series,
+                snowflake.snowpark.modin.pandas.dataframe.DataFrame,
+                snowflake.snowpark.modin.pandas.groupby.DataFrameGroupBy,
+                snowflake.snowpark.modin.pandas.resample.Resampler,
+                snowflake.snowpark.modin.pandas.window.Window,
+                snowflake.snowpark.modin.pandas.window.Rolling]:
+                The modified class with decorated methods.
+        """
+        for attr_name, attr_value in attrs.items():
+            if callable(attr_value) and (
+                not attr_name.startswith("_")
+                or (attr_name in TELEMETRY_PRIVATE_METHODS)
+            ):
+                attrs[attr_name] = snowpark_pandas_telemetry_method_decorator(
+                    attr_value
+                )
+            elif isinstance(attr_value, property):
+                # wrap on getter and setter
+                attrs[attr_name] = property(
+                    snowpark_pandas_telemetry_method_decorator(
+                        attr_value.__get__, property_name=attr_name
+                    ),
+                    snowpark_pandas_telemetry_method_decorator(
+                        attr_value.__set__, property_name=attr_name
+                    ),
+                    attr_value.__delattr__,
+                    doc=attr_value.__doc__,
+                )
+        return type.__new__(cls, name, bases, attrs)
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
new file mode 100644
index 00000000000..d5354343ca3
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
@@ -0,0 +1,384 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# See https://strftime.org/ and
+# https://docs.snowflake.com/en/sql-reference/functions-conversion#date-and-time-formats-in-conversion-functions
+import datetime as dt
+import re
+from typing import Literal, Optional, Union
+
+import numpy as np
+import pandas as native_pd
+from pandas._libs import lib
+from pandas._typing import DateTimeErrorChoices
+from pandas.api.types import is_datetime64_any_dtype, is_float_dtype, is_integer_dtype
+
+from snowflake.snowpark import Column
+from snowflake.snowpark._internal.analyzer.expression import Interval
+from snowflake.snowpark.functions import (
+    builtin,
+    cast,
+    convert_timezone,
+    date_part,
+    iff,
+    to_decimal,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.types import (
+    BooleanType,
+    DataType,
+    DateType,
+    LongType,
+    StringType,
+    TimestampTimeZone,
+    TimestampType,
+    VariantType,
+    _FractionalType,
+)
+
+VALID_TO_DATETIME_DF_KEYS = {
+    "year": "year",
+    "years": "year",
+    "month": "month",
+    "months": "month",
+    "day": "day",
+    "days": "day",
+    "h": "hour",
+    "hour": "hour",
+    "hours": "hour",
+    "m": "minute",
+    "minute": "minute",
+    "minutes": "minute",
+    "s": "second",
+    "second": "second",
+    "seconds": "second",
+    "ms": "ms",
+    "millisecond": "ms",
+    "milliseconds": "ms",
+    "us": "us",
+    "microsecond": "us",
+    "microseconds": "us",
+    "ns": "ns",
+    "nanosecond": "ns",
+    "nanoseconds": "ns",
+}
+"""
+Map of valid column names of a dataframe passed to `to_datetime` to a normalized version that
+we can check against in code. Valid column names include plural and abbreviated versions of
+the specified time units.
+"""
+
+# TODO: SNOW-1127160: support other units
+VALID_TO_DATETIME_UNIT = ["D", "s", "ms", "us", "ns"]
+
+
+def origin_to_ns(
+    origin: Union[float, int], unit: Literal["D", "s", "ms", "us", "ns"]
+) -> float:
+    """
+    Converts ``origin`` (given in the specified ``units``) to nanoseconds.
+    """
+    if unit == "D":
+        return origin * 24 * 3600 * (10**9)
+    elif unit == "s":
+        return origin * (10**9)
+    elif unit == "ms":
+        return origin * (10**6)
+    elif unit == "us":
+        return origin * (10**3)
+    else:
+        assert unit == "ns"
+        return origin
+
+
+def col_to_s(col: Column, unit: Literal["D", "s", "ms", "us", "ns"]) -> Column:
+    """
+    Converts ``col`` (stored in the specified units) to seconds.
+    """
+    if unit == "D":
+        return col * 24 * 3600
+    elif unit == "s":
+        return col
+    elif unit == "ms":
+        return col / 10**3
+    elif unit == "us":
+        return col / 10**6
+    else:
+        assert unit == "ns"
+        return col / 10**9
+
+
+PANDAS_DATETIME_FORMAT_TO_SNOWFLAKE_MAPPING = {
+    "%Y": "YYYY",
+    "%y": "YY",
+    "%m": "MM",
+    "%-m": "MM",
+    "%b": "MON",
+    "%B": "MMMM",
+    "%d": "DD",
+    "%-d": "DD",
+    "%a": "DY",
+    "%H": "HH24",
+    "%I": "HH12",
+    "%M": "MI",
+    "%S": "SS",
+    "%f": "FF",
+    "%p": "PM",
+    "%z": "TZHTZM",
+}
+
+DateTimeOrigin = Optional[
+    Union[str, int, float, dt.datetime, native_pd.Timestamp, np.datetime64]
+]
+
+
+def to_snowflake_timestamp_format(datetime_format: str) -> str:
+    """
+    Convert strftime format to Snowflake format, e.g., from "%d/%m/%Y" to "DD/MM/YYYY"
+    Args:
+        datetime_format: in strftime format
+
+    Returns:
+        Snowflake format
+    """
+    for k, v in PANDAS_DATETIME_FORMAT_TO_SNOWFLAKE_MAPPING.items():
+        datetime_format = datetime_format.replace(k, v)
+    return datetime_format
+
+
+def is_snowflake_timestamp_format_valid(sf_format: str) -> bool:
+    """
+    Check if a timestamp format valid. It will be invalid if it still contain "%.", i.e., strftime format
+    Args:
+        sf_format:
+
+    Returns:
+        True if it is valid
+    """
+    return not re.search("%.", sf_format)
+
+
+def generate_timestamp_col(
+    col: Column,
+    datatype: DataType,
+    *,
+    sf_format: Optional[str] = None,
+    errors: Literal["raise", "coerce"] = "raise",
+    target_tz: Optional[str] = None,
+    unit: Literal["D", "s", "ms", "us", "ns"],
+    origin: DateTimeOrigin = "unix",
+) -> Column:
+    """
+    Use Snowflake timestamp functions to convert column to timestamp in snowflake
+
+    Args:
+        col: the Snowpark column
+        datatype: data type of the column
+        has_tz: whether timezone is preserved
+        sf_format: format specified to parse string to timestamp. If format is given, we deliver the format to to_timestamp
+                function
+        errors: if 'raise', then invalid parsing will raise an exception, i.e., use to_timestamp* function; if 'coerce',
+                then invalid parsing will be set as NaT, i.e., use try_to_timestamp function; note this method cannot be
+                used for error = 'ignore'
+        target_tz: if not None, convert the value into TIMESTAMP_TZ with the target timezone; otherwise, convert to
+                    TIMESTAMP_NTZ
+        unit: the unit of values in the integer column (D,s,ms,us,ns)
+        origin: "unix", "julian", or timestamp-like representing reference date
+    Returns:
+        The column under to_timestamp_* function
+    """
+    assert errors in ["raise", "coerce"], f"errors={errors} cannot be handled here"
+    to_timestamp_func_name = "to_timestamp_ntz"
+    if errors == "coerce":
+        to_timestamp_func_name = "try_" + to_timestamp_func_name
+    new_col = col
+
+    # compute the ns offset of the provided origin from the unix epoch
+    origin_type = type(origin)
+    origin_ns: Union[int, float]
+    if origin == "unix":
+        origin_ns = 0
+    elif is_integer_dtype(origin_type) or is_float_dtype(origin_type):
+        # if origin is float or integer: treat as offset from 1970-01-01
+        origin_ns = origin_to_ns(origin, unit)  # type: ignore[arg-type]
+    elif isinstance(origin, native_pd.Timestamp):
+        origin_ns = origin.value
+    elif isinstance(origin, str) or is_datetime64_any_dtype(origin_type):
+        origin_ns = native_pd.to_datetime(origin).value
+    else:
+        raise TypeError(
+            f"Cannot convert input [{origin}] of type {origin_type} to Timestamp"
+        )
+
+    if sf_format:
+        if isinstance(datatype, _FractionalType):
+            # make sure always cast fractionalType to decimal with scale = 0 so the number can be converted by
+            # to_timestamp
+            new_col = to_decimal(new_col, precision=38, scale=0)
+
+        # always cast to string because 1) format requires string input; 2) to handle special cases needs string type
+        new_col = cast(new_col, StringType())
+        # handle a string which have string values like "nan", "nat". We follow pandas semantics to convert them to NaT
+        # (or Null in SQL). Note that Snowpark pandas treats any "nan" and "nat" (case insensitive) to NULL for simplicity
+        # and consistency; while pandas is mixed with both case sensitive and insensitive behaviors, e.g., "nAn" is
+        # invalid when call to_datetime without format but valid when call to_datetime with format.
+        new_col = iff(
+            builtin("ilike")(new_col, pandas_lit("nan"))
+            | builtin("ilike")(new_col, pandas_lit("nat")),
+            pandas_lit(None),
+            new_col,
+        )
+        has_tz = "TZHTZM" in sf_format if sf_format is not None else False
+        if has_tz:
+            to_timestamp_func_name = to_timestamp_func_name.replace("_ntz", "_tz")
+        # always cast to string because to_timestamp method with format requires string input
+        new_col = cast(new_col, StringType())
+        new_col = builtin(to_timestamp_func_name)(new_col, sf_format)
+    else:
+        if isinstance(datatype, (StringType, VariantType)):
+            WarningMessage.mismatch_with_pandas(
+                "to_datetime",
+                "Snowpark pandas to_datetime uses Snowflake's automatic format "
+                "detection to convert string to datetime when a format is not provided. "
+                "In this case Snowflake's auto format may yield different result values compared to pandas.",
+            )
+
+        from snowflake.snowpark.modin.plugin._internal.type_utils import (
+            NUMERIC_SNOWFLAKE_TYPES,
+        )
+
+        if isinstance(datatype, tuple(NUMERIC_SNOWFLAKE_TYPES)):
+            if isinstance(datatype, BooleanType):
+                # otherwise, need to explicitly cast to integer before casting to timestamp, since cast directly from
+                # boolean to Timestamp is invalid in Snowflake because boolean is not treated as numeric type in
+                # Snowflake
+                new_col = cast(new_col, LongType())
+            # pandas convert numeric value ns. Scale=9 is used to store nanoseconds
+            new_col = col_to_s(to_decimal(new_col, precision=38, scale=9), unit)
+        elif (
+            target_tz
+            and isinstance(datatype, TimestampType)
+            and datatype.tz != TimestampTimeZone.TZ
+        ):
+            # directly call convert_timezone won't work in this case, so we extract the epoch nanoseconds out and
+            # convert it to timestamp_tz and then call convert_timezone so that the timezone will be correct
+            new_col = (
+                to_decimal(date_part("epoch_nanosecond", new_col), 38, 9) / 10**9
+            )
+        elif (
+            not target_tz
+            and isinstance(datatype, TimestampType)
+            and datatype.tz != TimestampTimeZone.NTZ
+        ):
+            # when converting from datetime64 from tz aware to tz naive, pandas just extract the datetime and skip the
+            # timezone. For example, datetime 1970-01-01 00:00:00+09:00 with type "datetime64[ns, Asia/Tokyo]"
+            # (tz-aware) will be converted to 1970-01-01 00:00:00 with type "datetime64[ns]" (tz-naive). In Snowflake,
+            # we can use convert_timezone with target_timezone = "UTC" to achieve the same behavior.
+            new_col = convert_timezone(
+                target_timezone=pandas_lit("UTC"), source_time=new_col
+            )
+        elif target_tz and isinstance(datatype, DateType):
+            # directly call convert_timezone won't work in this case, so we extract the epoch seconds out and
+            # convert it to timestamp_tz and then call convert_timezone so that the timezone will be correct
+            new_col = date_part("epoch_second", new_col)
+        if target_tz:
+            to_timestamp_func_name = to_timestamp_func_name.replace("_ntz", "_tz")
+        new_col = builtin(to_timestamp_func_name)(new_col)
+    new_col = builtin("dateadd")("ns", origin_ns, new_col)
+    if target_tz:
+        new_col = convert_timezone(
+            target_timezone=pandas_lit(target_tz), source_time=new_col
+        )
+    if errors == "coerce":
+        # pandas return NaT when the timestamp is out of bound
+        new_col = iff(
+            new_col.between(
+                pandas_lit(str(native_pd.Timestamp.min)),
+                pandas_lit(str(native_pd.Timestamp.max)),
+            ),
+            new_col,
+            None,
+        )
+    return new_col
+
+
+def to_datetime_require_fallback(
+    format: str,
+    exact: Union[bool, lib.NoDefault] = lib.no_default,
+    infer_datetime_format: Union[lib.NoDefault, bool] = lib.no_default,
+    origin: DateTimeOrigin = "unix",
+    errors: DateTimeErrorChoices = "raise",
+) -> bool:
+    """
+    check whether to_datetime requires fallback
+    Args:
+        format: the format argument for to_datetime
+        exact: the exact argument for to_datetime
+        infer_datetime_format: the infer_datetime_format argument for to_datetime
+        origin: the origin argument for to_datetime
+        errors: the errors argument for to_datetime
+
+    Returns:
+        True if fallback is required; otherwise False
+    """
+    if format is not None and not is_snowflake_timestamp_format_valid(
+        to_snowflake_timestamp_format(format)
+    ):
+        # if format is not given, Snowflake's auto format detection may be different from pandas behavior
+        return True
+
+    if not exact:
+        # Snowflake does not allow the format to match anywhere in the target string when exact is False
+        return True
+    if infer_datetime_format != lib.no_default:
+        # infer_datetime_format is deprecated since version 2.0.0
+        return True
+    if origin == "julian":
+        # default for julian calendar support
+        return True
+    if errors == "ignore":
+        # ignore requires return the whole original input which is not applicable in Snowfalke
+        return True
+    return False
+
+
+def convert_dateoffset_to_interval(
+    value: native_pd.DateOffset,
+) -> Interval:
+    """
+    Converts a pandas DateOffset where value is treated as a timedelta to a Snowpark
+    Interval keyword. DateOffset with parameters that replace the offset value is not
+    yet supported, so a NotImplemented error is raised.
+    """
+    # Call DateOffset.kwds to parse the DateOffset into a dictionary of params
+    # If doff = pd.DateOffset(years=2, day=1), then doff.kwds returns {'years': 2, 'day': 1}
+    dateoffset_dict = value.kwds
+    # Handle case where the DateOffset has no argument or an integer argument
+    # Ex. pd.DateOffset() -> Timedelta 1 Day, pd.DateOffset(5) -> Timedelta 5 Days
+    if not dateoffset_dict:
+        return Interval(day=value.n)
+    # Handle case where DateOffset offset value is treated as a timedelta
+    param_mapping = {
+        "years": "year",
+        "months": "month",
+        "weeks": "week",
+        "days": "day",
+        "hours": "hour",
+        "minutes": "minute",
+        "seconds": "second",
+        "milliseconds": "millisecond",
+        "microseconds": "microsecond",
+        "nanoseconds": "nanosecond",
+    }
+    interval_kwargs = {}
+    for interval, offset in dateoffset_dict.items():
+        new_param = param_mapping.get(interval)
+        if new_param is None:
+            # TODO SNOW-1007629: Support DateOffset with replacement offset values
+            raise NotImplementedError(
+                "DateOffset with parameters that replace the offset value are not yet supported."
+            )
+        interval_kwargs[new_param] = offset
+    return Interval(**interval_kwargs)
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/transpose_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/transpose_utils.py
new file mode 100644
index 00000000000..e59ce0fc9a9
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/transpose_utils.py
@@ -0,0 +1,296 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from collections.abc import Hashable
+from typing import Union
+
+import pandas as native_pd
+
+from snowflake.snowpark.functions import any_value, get
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas import (
+    DataFrameDefault,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.unpivot_utils import (
+    UnpivotResultInfo,
+    _prepare_unpivot_internal,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    INDEX_LABEL,
+    LEVEL_LABEL,
+    ROW_POSITION_COLUMN_LABEL,
+    is_all_label_components_none,
+    is_json_serializable_pandas_labels,
+    pandas_lit,
+    parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label,
+    serialize_pandas_labels,
+)
+
+TRANSPOSE_INDEX = "TRANSPOSE_IDX"
+# transpose value column used in unpivot
+TRANSPOSE_VALUE_COLUMN = "TRANSPOSE_VAL"
+# transpose name column used in unpivot
+TRANSPOSE_NAME_COLUMN = "TRANSPOSE_COL_NAME"
+# transpose json parsed object name
+TRANSPOSE_OBJ_NAME_COLUMN = "TRANSPOSE_OBJ_NAME"
+
+
+def transpose_empty_df(
+    original_frame: InternalFrame,
+) -> "SnowflakeQueryCompiler":  # type: ignore[name-defined] # noqa: F821
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    return SnowflakeQueryCompiler.from_pandas(
+        native_pd.DataFrame(
+            columns=original_frame.index_columns_index,
+            index=original_frame.data_columns_index,
+        )
+    )
+
+
+def prepare_and_unpivot_for_transpose(
+    original_frame: InternalFrame,
+    query_compiler: "SnowflakeQueryCompiler",  # type: ignore[name-defined] # noqa: F821
+    is_single_row: bool = False,
+) -> Union[UnpivotResultInfo, "SnowflakeQueryCompiler"]:  # type: ignore[name-defined] # noqa: F821
+
+    # Check if the columns are all json serializable, if not, then go through fallback path.  The transpose approach
+    # here requires json serializable labels because we use sql parse_json to split out row position and multi-level
+    # index values as described below.
+    #
+    # TODO (SNOW-886400) Multi-level non-json serializable pandas label not handled.
+    if not is_json_serializable_pandas_labels(original_frame.data_column_pandas_labels):
+        return DataFrameDefault.register(native_pd.DataFrame.transpose)(query_compiler)
+
+    # Ensure there is a row position since preserving order is important for unpivot and transpose.
+    original_frame = original_frame.ensure_row_position_column()
+
+    # Transpose is implemented with unpivot followed by pivot. However when the input dataframe is empty, there are two issues
+    # 1) unpivot on empty table returns empty, which results in missing values in UNPIVOT_NAME_COLUMN
+    # 2) pivot values can not be empty.
+    # In order to overcome these, we add a dummy row to ordered_dataframe with row position value -1 to make sure
+    # there is always atleast one row in the table, and drop the dummy column associated with row position -1 after pivot.
+    ordered_dataframe = original_frame.ordered_dataframe
+    row_position_snowflake_quoted_identifier = (
+        original_frame.row_position_snowflake_quoted_identifier
+    )
+    if not is_single_row:
+        quoted_identifiers = (
+            ordered_dataframe.projected_column_snowflake_quoted_identifiers
+        )
+        new_columns = []
+        for identifier in quoted_identifiers:
+            if identifier == row_position_snowflake_quoted_identifier:
+                new_columns.append((pandas_lit(-1)).as_(identifier))
+            else:
+                # We use any_value to select any value in the dummy column to make sure its dtypes are
+                # the same as the column in the original dataframe. This helps avoid type incompatibility
+                # issues in union_all.
+                new_columns.append(any_value(identifier).as_(identifier))
+        dummy_df = ordered_dataframe.agg(new_columns)
+        ordered_dataframe = ordered_dataframe.union_all(dummy_df)
+
+    return _prepare_unpivot_internal(
+        original_frame=original_frame,
+        ordered_dataframe=ordered_dataframe,
+        is_single_row=is_single_row,
+        index_column_name=TRANSPOSE_INDEX,
+        value_column_name=TRANSPOSE_VALUE_COLUMN,
+        variable_column_name=TRANSPOSE_NAME_COLUMN,
+        object_column_name=TRANSPOSE_OBJ_NAME_COLUMN,
+    )
+
+
+def clean_up_transpose_result_index_and_labels(
+    original_frame: InternalFrame,
+    ordered_transposed_df: OrderedDataFrame,
+    transpose_name_quoted_snowflake_identifier: str,
+    transpose_object_name_quoted_snowflake_identifier: str,
+) -> InternalFrame:
+    """
+    Creates an internal frame based on the original frame and the data transposed snowpark dataframe.  This
+    cleans up and normalizes the labels and index values so they conform with expectations for pandas transpose.
+
+    Example:
+        If the original frame had:
+            data column labels ('a', 'x'), ('a', 'y'), ('b', 'w'), ('b', 'z') and index column values (g, h, i)
+        and transposed snowpark dataframe had:
+            schema ('"TRANSPOSE_OBJ_NAME"',
+                '"{""0"":""g"", ""row"":0}"', '"{""0"":""h"", ""row"":1}"', '"{""0"":""i"", ""row"":2}"')
+            and values for TRANSPOSE_OBJ_NAME: [0, ["a", "x"]], [1, ["a", "y"]], [2, ["b", "w"]], [3, ["b", "z"]]
+        then the dataframe index is split into multi-columns and labels are cleaned up.
+
+        The resulting frame would have (transposed indexes):
+            data column labels: (g, h, i) and index column values ('a', 'x'), ('a', 'y'), ('b', 'w'), ('b', 'z')
+        and normalized snowpark dataframe:
+            schema ('"row_position"', '"level"', '"level_1"', '"g"', '"h"' ,'"i"')
+            and values (0, a, x), (1, a, y), (2, b, w), (3, b, z) for values __row_position, level, level_1
+
+    Args:
+        original_frame: The original InternalFrame for the transpose
+        ordered_transposed_df: The transposed ordered dataframe
+        transpose_name_quoted_snowflake_identifier: variable name identifier from the unpivot
+        transpose_object_name_quoted_snowflake_identifier: values from the unpivot
+
+    Returns:
+        The transposed InternalFrame.
+    """
+    # The remaining columns are the resulting output columns of the transpose, except for the TRANSPOSE_NAME_COLUMN
+    # which becomes the new index of the resulting table.
+    data_column_snowflake_quoted_identifiers = (
+        ordered_transposed_df.projected_column_snowflake_quoted_identifiers
+    )
+    data_column_snowflake_quoted_identifiers.remove(
+        transpose_name_quoted_snowflake_identifier
+    )
+    data_column_snowflake_quoted_identifiers.remove(
+        transpose_object_name_quoted_snowflake_identifier
+    )
+    data_column_object_identifier_pairs = [
+        (
+            parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label(
+                snowflake_quoted_identifier,
+                len(original_frame.index_column_pandas_labels),
+            ),
+            snowflake_quoted_identifier,
+        )
+        for snowflake_quoted_identifier in data_column_snowflake_quoted_identifiers
+    ]
+
+    # Extract the position information that was previously serialized into the column names, then sort and
+    # re-organize the column names to maintain the original ordering from the pre-transpose rows.
+    data_column_object_identifier_pairs.sort(
+        key=lambda obj_ident: obj_ident[0][1]["row"]
+    )
+
+    # Drop the identifiers associated with dummy column row:-1 generated from the dummy row in transpose.
+    if len(data_column_object_identifier_pairs) > 0:
+        if data_column_object_identifier_pairs[0][0][1]["row"] == -1:
+            data_column_object_identifier_pairs.remove(
+                data_column_object_identifier_pairs[0]
+            )
+
+    # If it's a single level, we store the label, otherwise we store tuple for each level.
+    new_data_column_pandas_labels = [
+        data_column_object_identifier[0]
+        for data_column_object_identifier, _ in data_column_object_identifier_pairs
+    ]
+
+    new_data_column_snowflake_quoted_identifiers = [
+        snowflake_quoted_identifier
+        for _, snowflake_quoted_identifier in data_column_object_identifier_pairs
+    ]
+
+    # We need to split out the TRANSPOSE_OBJ_NAME_COLUMN with two cases:
+    #
+    # If it is a single index, the format will be [1, "employed"] and result in new columns with values:
+    #       (row_position, 1), ("__level__", "employed")
+    #
+    # If it is a multi-index, the format will be [1, ["status", "employed"]] and result in new columns with values:
+    #       (row_position, 1), ("__level_1__", "status"), ("__level_2__", "employed")
+    new_index_column_pandas_labels: list[Hashable] = []
+    new_index_column_snowflake_quoted_identifiers: list[str] = []
+    for i, pandas_label in enumerate(original_frame.data_column_pandas_index_names):
+        if is_all_label_components_none(pandas_label):
+            index_label = LEVEL_LABEL
+            if i >= 1:
+                index_label += f"_{i}"
+        else:
+            index_label = pandas_label
+
+        snowflake_quoted_identifier = (
+            ordered_transposed_df.generate_snowflake_quoted_identifiers(
+                pandas_labels=serialize_pandas_labels([index_label]),
+                excluded=new_data_column_snowflake_quoted_identifiers
+                + new_index_column_snowflake_quoted_identifiers,
+            )[0]
+        )
+
+        new_index_column_pandas_labels.append(pandas_label)
+        new_index_column_snowflake_quoted_identifiers.append(
+            snowflake_quoted_identifier
+        )
+
+    # Extract the new row position and pandas label object from column
+    # transpose_object_name_quoted_snowflake_identifier, which is an array column
+    # with value [row_position, label object] like [0, "score"]. The label object
+    # for multi-index can look like {"0": "A", "1": "B"} for panda label ("A", "B").
+
+    # Generate the snowflake quoted identifier for extracted row position and pandas
+    # label object columns.
+    row_position_and_index_snowflake_quoted_identifier = (
+        ordered_transposed_df.generate_snowflake_quoted_identifiers(
+            pandas_labels=[ROW_POSITION_COLUMN_LABEL, INDEX_LABEL],
+            excluded=new_data_column_snowflake_quoted_identifiers
+            + new_index_column_snowflake_quoted_identifiers,
+        )
+    )
+    pivot_with_index_select_list = [
+        get(transpose_object_name_quoted_snowflake_identifier, i).as_(
+            snowflake_quoted_identifier
+        )
+        for i, snowflake_quoted_identifier in enumerate(
+            row_position_and_index_snowflake_quoted_identifier
+        )
+    ] + new_data_column_snowflake_quoted_identifiers
+
+    ordered_transposed_df = ordered_transposed_df.select(pivot_with_index_select_list)
+
+    row_position_snowflake_quoted_identifier = (
+        row_position_and_index_snowflake_quoted_identifier[0]
+    )
+    index_snowflake_quoted_identifier = (
+        row_position_and_index_snowflake_quoted_identifier[1]
+    )
+    # Handle the multi-index case by further parsing out each level to a separate level_# columns.
+    if len(new_index_column_snowflake_quoted_identifiers) > 1:
+        pivot_with_multi_index_select_list = (
+            [row_position_snowflake_quoted_identifier]
+            + [
+                get(index_snowflake_quoted_identifier, i).as_(
+                    snowflake_quoted_identifier
+                )
+                for i, snowflake_quoted_identifier in enumerate(
+                    new_index_column_snowflake_quoted_identifiers
+                )
+            ]
+            + new_data_column_snowflake_quoted_identifiers
+        )
+
+        ordered_transposed_df = ordered_transposed_df.select(
+            pivot_with_multi_index_select_list
+        )
+    else:
+        # If it is a single level then no more extraction is needed after separating the row position and index.
+        new_index_column_snowflake_quoted_identifiers = [
+            index_snowflake_quoted_identifier
+        ]
+
+    # Create new internal frame with resulting ordering column and transposed index values.
+    ordered_transposed_df = ordered_transposed_df.sort(
+        OrderingColumn(row_position_snowflake_quoted_identifier)
+    )
+
+    new_internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_transposed_df,
+        data_column_pandas_labels=new_data_column_pandas_labels,
+        data_column_pandas_index_names=original_frame.index_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+        index_column_pandas_labels=new_index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=new_index_column_snowflake_quoted_identifiers,
+    )
+
+    # Rename the data column snowflake quoted identifiers to be closer to pandas labels, normalizing names
+    # will remove information like row position that may have temporarily been included in column names to track
+    # during earlier steps.
+    new_internal_frame = (
+        new_internal_frame.normalize_snowflake_quoted_identifiers_with_pandas_label()
+    )
+
+    return new_internal_frame
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
new file mode 100644
index 00000000000..0b30930d9cf
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/type_utils.py
@@ -0,0 +1,429 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from functools import reduce
+from typing import Any, Union
+
+import numpy as np
+import pandas as native_pd
+from pandas import DatetimeTZDtype
+from pandas.api.extensions import ExtensionDtype
+from pandas.api.types import (
+    is_datetime64_any_dtype,
+    is_object_dtype,
+    is_scalar,
+    is_string_dtype,
+)
+from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
+from pandas.core.arrays.integer import (
+    Int8Dtype,
+    Int16Dtype,
+    Int32Dtype,
+    Int64Dtype,
+    UInt8Dtype,
+    UInt16Dtype,
+    UInt32Dtype,
+    UInt64Dtype,
+)
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+    is_timedelta64_dtype,
+)
+
+from snowflake.snowpark import Column
+from snowflake.snowpark._internal.type_utils import infer_type, merge_type
+from snowflake.snowpark.functions import (
+    builtin,
+    cast,
+    col,
+    date_part,
+    iff,
+    length,
+    to_varchar,
+    to_variant,
+)
+from snowflake.snowpark.modin.plugin._internal.timestamp_utils import (
+    generate_timestamp_col,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.types import (
+    ArrayType,
+    BinaryType,
+    BooleanType,
+    ByteType,
+    DataType,
+    DateType,
+    DecimalType,
+    DoubleType,
+    FloatType,
+    GeographyType,
+    IntegerType,
+    LongType,
+    MapType,
+    NullType,
+    ShortType,
+    StringType,
+    TimestampTimeZone,
+    TimestampType,
+    TimeType,
+    VariantType,
+    _FractionalType,
+    _IntegralType,
+    _NumericType,
+)
+
+# The order of this mapping is important because the first match in either
+# direction is used by TypeMapper.to_pandas() and TypeMapper.to_snowflake()
+NUMPY_SNOWFLAKE_TYPE_PAIRS: list[tuple[Union[type, str], DataType]] = [
+    (np.int64, LongType()),
+    (np.uint64, LongType()),
+    (np.int32, IntegerType()),
+    (np.uint32, IntegerType()),
+    (np.int16, ShortType()),
+    (np.uint16, ShortType()),
+    (np.int8, ByteType()),
+    (np.uint8, ByteType()),
+    (np.float32, FloatType()),
+    (np.half, FloatType()),
+    (np.float16, FloatType()),
+    (np.float64, DoubleType()),
+    (np.float_, DoubleType()),
+    (np.object_, VariantType()),
+    (np.bool_, BooleanType()),
+    ("datetime64[ns]", TimestampType()),
+]
+
+# Note strictly speaking these are only used to map FROM pandas TO snowflake
+PANDAS_EXT_SNOWFLAKE_TYPE_PAIRS: list[tuple[ExtensionDtype, DataType]] = [
+    (BooleanDtype(), BooleanType()),
+    (Float32Dtype(), FloatType()),
+    (Float64Dtype(), DoubleType()),
+    (Int64Dtype(), LongType()),
+    (UInt64Dtype(), LongType()),
+    (Int32Dtype(), IntegerType()),
+    (UInt32Dtype(), IntegerType()),
+    (Int16Dtype(), ShortType()),
+    (UInt16Dtype(), ShortType()),
+    (Int8Dtype(), ByteType()),
+    (UInt8Dtype(), ByteType()),
+    (StringDtype(), StringType()),
+]
+
+
+# List of snowflake types that are treated as numeric data types
+NUMERIC_SNOWFLAKE_TYPES: list[DataType] = [
+    LongType,
+    IntegerType,
+    ShortType,
+    ByteType,
+    FloatType,
+    DoubleType,
+    DecimalType,
+    # note that in snowflake boolean type is not treated as numeric, but
+    # in pandas it is. Here we treat it as numeric dtype to stay consistent
+    # with pandas behavior.
+    BooleanType,
+]
+NUMERIC_SNOWFLAKE_TYPES_TUPLE = tuple(NUMERIC_SNOWFLAKE_TYPES)
+TIME_SNOWFLAKE_TYPES: list[DataType] = [DateType, TimeType, TimestampType]
+STRING_SNOWFLAKE_TYPES: list[DataType] = [StringType, BinaryType]
+# List of snowflake types that are non-numeric.
+NON_NUMERIC_SNOWFLAKE_TYPES: list[DataType] = (
+    TIME_SNOWFLAKE_TYPES
+    + STRING_SNOWFLAKE_TYPES
+    + [
+        GeographyType,
+        MapType,
+        ArrayType,
+        VariantType,
+    ]
+)
+
+
+def generate_pandas_to_snowflake_map() -> dict[
+    Union[np.dtype, ExtensionDtype], DataType
+]:
+    d = {}
+    # Create a mapping from pandas to snowflake types
+    # the type pair mapping has duplicates so add only the first one.
+    for (nptype, s) in NUMPY_SNOWFLAKE_TYPE_PAIRS:
+        p: np.dtype = np.dtype(nptype)
+        if p not in d:
+            d[p] = s
+
+    for (p, s) in PANDAS_EXT_SNOWFLAKE_TYPE_PAIRS:
+        if p not in d:
+            d[p] = s
+    return d
+
+
+# Note that we are DELIBERATELY leaving out SNOWFLAKE_TYPE_PAIRS.
+# By default we return numpy types. We can change this decision by
+# including the inverse of the PANDAS_EXT_SNOWFLAKE_TYPE_PAIRS here.
+def generate_snowflake_to_pandas_map() -> dict[DataType, np.dtype]:
+    d: dict[DataType, np.dtype] = {}
+    for (p, s) in NUMPY_SNOWFLAKE_TYPE_PAIRS:
+        if s not in d:
+            d[s] = np.dtype(p)
+    return d
+
+
+PANDAS_TO_SNOWFLAKE_MAP = generate_pandas_to_snowflake_map()
+SNOWFLAKE_TO_PANDAS_MAP = generate_snowflake_to_pandas_map()
+
+
+def infer_series_type(series: native_pd.Series) -> DataType:
+    """Infer the snowpark DataType for the given native pandas series"""
+
+    data_type = series.dtype
+    if is_timedelta64_dtype(data_type):
+        raise NotImplementedError(
+            "Snowpark pandas does not support timedelta64 data type."
+        )
+    if data_type == np.object_:
+        # if the series type is object type, try to derive the snowpark type based on
+        # the type of each data element of the series. If failed to derive the type
+        # information from the data or no data is available, we map it to VariantType()
+        # to indicate this column may have mixed data types or data type is unknown.
+        if series.size > 0:
+            try:
+                snowflake_type = reduce(
+                    merge_type, (infer_object_type(o) for o in series)
+                )
+            except (TypeError, NotImplementedError):
+                # if failed to infer type for object column, we treat it as VariantType
+                snowflake_type = VariantType()
+        else:
+            snowflake_type = VariantType()
+    else:
+        snowflake_type = TypeMapper.to_snowflake(data_type)
+
+    return snowflake_type
+
+
+def infer_object_type(obj: Any) -> DataType:
+    """Infer the snowpark DataType from obj"""
+
+    # For scalar obj, we do a check to see if it is a missing value
+    # in pandas first, if it is missing value, it will be mapped to
+    # None in snowpark, and mapped to NULL in snowflake. Therefore, the
+    # Type for the object will be NullType. pandas missing value includes
+    # np.nan, None, pd.NaT and pd.NA.
+    if is_scalar(obj) and native_pd.isna(obj):
+        return NullType()
+
+    try:
+        # try to derive the regular python type by calling snowpark infer_type
+        datatype = infer_type(obj)
+    except TypeError:
+        datatype = TypeMapper.to_snowflake(type(obj))
+        if datatype == TimestampType() and getattr(obj, "tzinfo", None):
+            datatype = TimestampType(TimestampTimeZone.TZ)
+    return datatype
+
+
+class TypeMapper:
+    @classmethod
+    def to_snowflake(
+        cls, p: Union[np.dtype, ExtensionDtype, native_pd.Timestamp]
+    ) -> DataType:
+        """
+        map a pandas or numpy type to snowpark data type.
+        """
+        if isinstance(p, DatetimeTZDtype):
+            return TimestampType(TimestampTimeZone.TZ)
+        if p is native_pd.Timestamp or is_datetime64_any_dtype(p):
+            return TimestampType()
+        if is_object_dtype(p):
+            return VariantType()
+        if is_string_dtype(p):
+            return StringType()
+
+        if is_bool_dtype(p):
+            return BooleanType()
+        if is_integer_dtype(p):
+            return LongType()
+        if is_float_dtype(p):
+            return DoubleType()
+
+        try:
+            return PANDAS_TO_SNOWFLAKE_MAP[p]
+        except KeyError:
+            raise NotImplementedError(f"pandas type {p} is not implemented")
+
+    @classmethod
+    def to_pandas(cls, s: DataType) -> Union[np.dtype, ExtensionDtype]:
+        """
+        map a snowpark type to numpy type or pandas extended dtype.
+        """
+        # Treat decimal as a special case
+        if isinstance(s, DecimalType):
+            return np.dtype("int64") if s.scale == 0 else np.dtype("float64")
+        if isinstance(s, TimestampType):
+            return np.dtype("datetime64[ns]")
+        # We also need to treat parameterized types correctly
+        if isinstance(s, (StringType, ArrayType, MapType, GeographyType)):
+            return np.dtype(np.object_)
+        return SNOWFLAKE_TO_PANDAS_MAP.get(s, np.dtype(np.object_))
+
+
+def column_astype(
+    id: str,
+    from_sf_type: DataType,
+    to_dtype: Union[np.dtype, ExtensionDtype],
+    to_sf_type: DataType,
+) -> Column:
+    """
+    Generate new column after calling astype on that column.
+    Args:
+        id: the quoted identifier
+        from_sf_type: from Snowflake type
+        to_dtype: to pandas dtype
+        to_sf_type: to Snowflake type
+
+    Returns:
+        The new column after calling astype
+    """
+    curr_col = col(id)
+
+    if to_dtype == np.object_:
+        return to_variant(curr_col)
+
+    if from_sf_type == to_sf_type:
+        return curr_col
+
+    if isinstance(to_sf_type, _IntegralType) and "int64" not in str(to_dtype).lower():
+        WarningMessage.single_warning(
+            "Snowpark pandas API auto cast all integers to int64"
+        )
+
+    if (
+        isinstance(to_sf_type, (FloatType, DoubleType))
+        and "float64" not in str(to_dtype).lower()
+    ):
+        WarningMessage.single_warning(
+            "Snowpark pandas API auto cast all floating points to float64"
+        )
+
+    if (
+        isinstance(from_sf_type, TimestampType)
+        and from_sf_type.tz == TimestampTimeZone.LTZ
+    ):
+        # treat TIMESTAMPT_LTZ columns as same as TIMESTAMPT_TZ
+        curr_col = builtin("to_timestamp_tz")(curr_col)
+
+    if isinstance(to_sf_type, TimestampType):
+        assert to_sf_type.tz != TimestampTimeZone.LTZ, (
+            "Cast to TIMESTAMPT_LTZ is not supported in astype since "
+            "Snowpark pandas API maps tz aware datetime to TIMESTAMP_TZ"
+        )
+        # convert to timestamp
+        new_col = generate_timestamp_col(
+            curr_col,
+            from_sf_type,
+            target_tz=str(to_dtype.tz)
+            if isinstance(to_dtype, DatetimeTZDtype)
+            else None,
+            unit="ns",
+        )
+    elif isinstance(from_sf_type, StringType) and isinstance(to_sf_type, BooleanType):
+        new_col = iff(length(curr_col) > 0, True, False)
+    elif isinstance(from_sf_type, BooleanType) and isinstance(to_sf_type, StringType):
+        new_col = iff(curr_col, "True", "False")
+    elif isinstance(from_sf_type, TimestampType) and isinstance(
+        to_sf_type, tuple(NUMERIC_SNOWFLAKE_TYPES)
+    ):
+        # pandas datetime unit is always ns from epoch, so we have to make this conversion too
+        new_col = cast(date_part("epoch_nanosecond", curr_col), to_sf_type)
+    elif isinstance(from_sf_type, TimestampType) and isinstance(to_sf_type, StringType):
+        if from_sf_type.tz == TimestampTimeZone.NTZ:
+            # e.g., "1970-01-01 00:00:00.000000001"
+            new_col = to_varchar(curr_col, "YYYY-MM-DD HH24:MI:SS.FF")
+        else:
+            # e.g., "1970-01-01 00:00:00.000000001+09:00". See format details in
+            # https://docs.snowflake.com/en/user-guide/date-time-input-output#about-the-elements-used-in-input-and-output-formats
+            new_col = to_varchar(curr_col, "YYYY-MM-DD HH24:MI:SS.FFTZH:TZM")
+    elif isinstance(from_sf_type, BooleanType) and isinstance(
+        to_sf_type, _FractionalType
+    ):
+        # Snowflake does not allow casting boolean to float directly
+        # make sure the column is cast to numeric first
+        new_col = cast(cast(curr_col, LongType()), to_sf_type)
+    elif isinstance(from_sf_type, _FractionalType) and isinstance(
+        to_sf_type, BooleanType
+    ):
+        # Snowflake does not allow casting float to boolean directly
+        # make sure the column is cast to numeric first
+        new_col = cast(cast(curr_col, LongType()), to_sf_type)
+    elif isinstance(from_sf_type, (TimeType, DateType)) and isinstance(
+        to_sf_type, BooleanType
+    ):
+        # e.g., pd.Series([date(year=1, month=1, day=1)]*3).astype(bool) returns all true values
+        new_col = cast(pandas_lit(True), to_sf_type)
+    else:
+        new_col = cast(curr_col, to_sf_type)
+    # astype should not have any effect on NULL values
+    return iff(curr_col.is_null(), None, new_col)
+
+
+def is_astype_type_error(
+    from_sf_type: DataType,
+    to_sf_type: DataType,
+) -> bool:
+    """
+    Check whether astype will raise TypeError
+    Args:
+        from_sf_type: from mapped Snowflake type
+        to_sf_type: to mapped Snowflake type
+
+    Returns:
+        True if it is one of the following pandas TypeError:
+        - convert from any datetime to float
+        - convert from boolean to DatetimeTZDtype
+        - convert from time to any numeric or datetime
+        - convert from date to any numeric
+    """
+    if isinstance(from_sf_type, TimestampType) and isinstance(
+        to_sf_type, (FloatType, DoubleType)
+    ):
+        return True
+    elif (
+        isinstance(from_sf_type, BooleanType)
+        and isinstance(to_sf_type, TimestampType)
+        and to_sf_type.tz == TimestampTimeZone.TZ
+    ):
+        return True
+    elif isinstance(from_sf_type, TimeType) and isinstance(
+        to_sf_type, (_NumericType, TimestampType)
+    ):
+        return True
+    elif isinstance(from_sf_type, DateType) and isinstance(to_sf_type, _NumericType):
+        return True
+    else:
+        return False
+
+
+def is_numeric_snowpark_type(snowpark_type: DataType) -> bool:
+    return isinstance(snowpark_type, tuple(NUMERIC_SNOWFLAKE_TYPES))
+
+
+def is_compatible_snowpark_types(sp_type_1: DataType, sp_type_2: DataType) -> bool:
+    """
+    Check whether two Snowpark types are compatible. Two Snowpark types are compatible if
+    they are the same type or both are Snowpark numeric type.
+    """
+    if sp_type_1 == sp_type_2:
+        return True
+
+    if isinstance(sp_type_1, _NumericType) and isinstance(sp_type_2, _NumericType):
+        return True
+
+    # StringType of different length are compatible types.
+    if isinstance(sp_type_1, StringType) and isinstance(sp_type_2, StringType):
+        return True
+    return False
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py
new file mode 100644
index 00000000000..cea4cb8d082
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py
@@ -0,0 +1,897 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import json
+import typing
+from collections.abc import Hashable
+from typing import Optional
+
+from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    quote_name_without_upper_casing,
+)
+from snowflake.snowpark.column import CaseExpr, Column as SnowparkColumn
+from snowflake.snowpark.functions import (
+    cast,
+    coalesce,
+    col,
+    get,
+    get_path,
+    is_null,
+    lit,
+    object_construct,
+    parse_json,
+    to_variant,
+    when,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    append_columns,
+    generate_column_identifier_random,
+    pandas_lit,
+    unquote_name_if_quoted,
+)
+from snowflake.snowpark.types import ArrayType, MapType, StringType, VariantType
+
+# Separate set of columns for unpivot w/o transpose for
+# clarity
+UNPIVOT_INDEX = "UNPIVOT_IDX"
+# unpivot value column used in unpivot
+UNPIVOT_VALUE_COLUMN = "UNPIVOT_VALUE"
+# unpivot name column used in unpivot
+UNPIVOT_NAME_COLUMN = "UNPIVOT_VARIABLE"
+# unpivot json parsed object name
+UNPIVOT_OBJ_NAME_COLUMN = "UNPIVOT_OBJ_NAME"
+
+VALUE_COLUMN_FOR_SINGLE_ROW = '\'{"0":"NULL","row":0}\''
+ROW_KEY = "row"
+# the value used to replace the NULL for unpivot columns
+UNPIVOT_NULL_REPLACE_VALUE = "NULL_REPLACE"
+UNPIVOT_ORDERING_COLUMN_PREFIX = "UNPIVOT_ORDERING_"
+UNPIVOT_SINGLE_INDEX_PREFIX = "UNPIVOT_SINGLE_INDEX"
+
+# Default column names for pandas melt
+DEFAULT_PANDAS_UNPIVOT_VARIABLE_NAME = "variable"
+DEFAULT_PANDAS_UNPIVOT_VALUE_NAME = "value"
+
+
+class UnpivotResultInfo(typing.NamedTuple):
+    """
+    Structure that stores information about the unpivot result.
+
+    Parameters
+    ----------
+    ordered_dataframe: OrderedDataFrame
+        Resulting ordered dataframe.
+    index_snowflake_quoted_identifier: str
+        index column used in unpivot.
+    new_value_quoted_identifier: str
+        value column used in unpivot.
+    variable_name_quoted_snowflake_identifier: str
+        variable name column used in unpivot.
+    object_name_quoted_snowflake_identifier: str
+        json parsed object column used in unpivot.
+    pandas_id_columns: list[Hashable]
+        list of columns which are "identifier" columns in the
+        unpivot which are untouched by the unpivot operation
+    snowflake_id_quoted_columns: list[str]
+        list of pandas_id_columns, quoted.
+
+    """
+
+    ordered_dataframe: OrderedDataFrame
+    index_snowflake_quoted_identifier: str
+    new_value_quoted_identifier: str
+    variable_name_quoted_snowflake_identifier: str
+    object_name_quoted_snowflake_identifier: str
+    pandas_id_columns: list[Hashable]
+    snowflake_id_quoted_columns: list[str]
+
+
+def unpivot(
+    original_frame: InternalFrame,
+    pandas_id_columns: list[Hashable],
+    pandas_value_columns: list[Hashable],
+    pandas_var_name: Optional[Hashable],
+    pandas_value_name: Optional[Hashable],
+    ignore_index: Optional[bool],
+) -> InternalFrame:
+    """
+    Performs an unpivot/melt operation using one of two methods, a faster method which does not support
+    preserving an index and duplicate columns and a slower method which uses the same unpivot
+    operation used for transpose. If the dataframe has these complications we must use the more general
+    method which moves the column data in and out of json and handles complex indexes.
+
+    Args:
+        original_frame: InternalFrame prior to unpivot
+        pandas_id_columns: a list of identity columns to preserve in the output (unpivoted)
+        pandas_value_columns: a list of value columns to unpivot
+        pandas_var_name: the name of the "variable" column
+        pandas_value_name: the name of the "value" column
+        ignore_index: whether to ignore the index or not - default is ignore, and it uses the simple unpivot
+
+    Returns:
+        An InternalFrame as a result of the unpivot
+    """
+    if _can_use_simple_unpivot(
+        ignore_index=ignore_index, pandas_value_columns=pandas_value_columns
+    ):
+        return _simple_unpivot(
+            original_frame=original_frame,
+            pandas_id_columns=pandas_id_columns,
+            pandas_value_columns=pandas_value_columns,
+            pandas_var_name=pandas_var_name,
+            pandas_value_name=pandas_value_name,
+        )
+
+    return _general_unpivot(
+        original_frame=original_frame,
+        pandas_id_columns=pandas_id_columns,
+        pandas_value_columns=pandas_value_columns,
+        pandas_var_name=pandas_var_name,
+        pandas_value_name=pandas_value_name,
+        ignore_index=ignore_index,
+    )
+
+
+def _can_use_simple_unpivot(
+    ignore_index: Optional[bool], pandas_value_columns: list[Hashable]
+) -> bool:
+    """
+    Determines if the simplified unpivot can be used.
+
+    Args:
+        ignore_index: are we supposed to ignore the index
+        pandas_value_columns: a list of value columns to unpivot
+    Returns:
+        True if we can use the simple unpivot, false otherwise
+    """
+    # df.melt defaults to ignoring the index
+    if ignore_index is False:
+        return False
+    # to use the simple unpivot, all columns should be strings
+    if not all(isinstance(col, str) for col in pandas_value_columns):
+        return False
+    # columns should not have duplicates
+    if len(set(pandas_value_columns)) != len(pandas_value_columns):
+        return False
+    return True
+
+
+def _general_unpivot(
+    original_frame: InternalFrame,
+    pandas_id_columns: list[Hashable],
+    pandas_value_columns: list[Hashable],
+    pandas_var_name: Optional[Hashable],
+    pandas_value_name: Optional[Hashable],
+    ignore_index: Optional[bool],
+) -> InternalFrame:
+    unpivot_result = _prepare_unpivot_internal(
+        original_frame=original_frame,
+        ordered_dataframe=original_frame.ordered_dataframe,
+        is_single_row=False,
+        index_column_name=UNPIVOT_INDEX,
+        value_column_name=UNPIVOT_VALUE_COLUMN,
+        variable_column_name=UNPIVOT_NAME_COLUMN,
+        object_column_name=UNPIVOT_OBJ_NAME_COLUMN,
+        pandas_id_columns=pandas_id_columns,
+        pandas_value_columns=pandas_value_columns,
+    )
+
+    return clean_up_unpivot(
+        original_frame=original_frame,
+        ordered_unpivoted_df=unpivot_result.ordered_dataframe,
+        unpivot_index_snowflake_identifier=unpivot_result.index_snowflake_quoted_identifier,
+        new_value_quoted_snowflake_identifier=unpivot_result.new_value_quoted_identifier,
+        variable_final_column_name=DEFAULT_PANDAS_UNPIVOT_VARIABLE_NAME
+        if pandas_var_name is None
+        else pandas_var_name,
+        value_final_column_name=DEFAULT_PANDAS_UNPIVOT_VALUE_NAME
+        if pandas_value_name is None
+        else pandas_value_name,
+        pandas_id_columns=unpivot_result.pandas_id_columns,
+        snowflake_id_quoted_columns=unpivot_result.snowflake_id_quoted_columns,
+        ignore_index=ignore_index,
+    )
+
+
+def _prepare_unpivot_internal(
+    original_frame: InternalFrame,
+    ordered_dataframe: OrderedDataFrame,
+    is_single_row: bool,
+    index_column_name: Hashable,
+    value_column_name: Hashable,
+    variable_column_name: Hashable,
+    object_column_name: Hashable,
+    pandas_id_columns: Optional[list[Hashable]] = None,
+    pandas_value_columns: Optional[list[Hashable]] = None,
+) -> UnpivotResultInfo:  # type: ignore[name-defined] # noqa: F821
+    """
+    Performs the first steps required to unpivot or transpose this QueryCompiler. This includes constructing a temporary index
+    with position information, and then applying an unpivot operation.
+    When is_single_row is true, the pandas label for the result column will be lost, and set to "None".
+
+    Args:
+        original_frame: InternalFrame prior to unpivot
+        is_single_row: indicator to short-circuit some behavior for unpivot
+        index_column_name: internal name used for the index reference column
+        value_column_name: internal name used for the value column from the unpivot operation
+        variable_column_name: internal name used for the variable column from the unpivot
+        object_column_name: internal name used for storing serialized column names and positions
+        pandas_id_columns: list of passthrough identity columns which are untouched by the unpivot
+        pandas_value_vars: list of columns to unpivot, if None, all will be unpivoted
+
+    Returns:
+        a list consisting of the unpivoted OrderedDataFrame and a group of quoted identifiers that are required for
+        the following transpose steps of pivot and cleanup (or just cleanup).
+    """
+    ##############################################################################
+    # Unpivot / Transpose are Complicated operations. The following example
+    # dataframe is used to show the intermediate results of the dataframe at each step
+    # using the melt operation (unpivot).
+    #
+    # data = {"abc": ["A", "B", np.NaN], "123": [1, np.NaN, 3], "state": ["CA", "WA", "NY"]}
+    # index = npd.MultiIndex.from_tuples([("one", "there"), ("two", "be"), ("two", "dragons")],
+    #                                     names=["L1", "L2"])
+    # df = npd.DataFrame(data, index=index)
+    # df
+    #              abc  123 state
+    # L1  L2
+    # one there      A  1.0    CA
+    # two be         B  NaN    WA
+    #     dragons  NaN  3.0    NY
+    #
+    # df.melt(id_vars=["state"],
+    #         value_vars=["abc", "123"],
+    #         ignore_index=False,
+    #         var_name = "independent",
+    #         value_name = "dependent")
+    #
+    #             state independent dependent
+    # L1  L2
+    # one there      CA         abc         A
+    # two be         WA         abc         B
+    #     dragons    NY         abc       NaN
+    # one there      CA         123       1.0
+    # two be         WA         123       NaN
+    #     dragons    NY         123       3.0
+    #
+    # ordered_frame.to_pandas() prior to executing this function
+    #       __L1__   __L2__   abc  123 state  __row_position__
+    # 0    one    there     A  1.0    CA                 0
+    # 1    two       be     B  NaN    WA                 1
+    # 2    two  dragons  None  3.0    NY                 2
+    #
+    if pandas_id_columns is None:
+        pandas_id_columns = []
+    if pandas_value_columns is None:
+        pandas_value_columns = []
+
+    row_position_snowflake_quoted_identifier = (
+        original_frame.row_position_snowflake_quoted_identifier
+    )
+    # ordered_dataframe.to_pandas() at this point
+    #   __L1__   __L2__   abc  123 state  __row_position__
+    # 0    one    there     A  1.0    CA                -1 <--- DUMMY ROW (For transpose)
+    # 1    one    there     A  1.0    CA                 0
+    # 2    two       be     B  NaN    WA                 1
+    # 3    two  dragons  None  3.0    NY                 2
+    #
+    # The following two steps correspond to STEPS (1) and (2) in the four steps described in
+    # SnowflakeQueryCompiler.transpose().
+
+    # STEP 1) Construct a temporary index column that contains the original index with position, so for example if
+    # there was a multi-level index ['name', 'score'] with index values ('alice', 9.5), ('bob', 8) this would
+    # be serialized into a single column with values {"0":"alice","1":9.5,"row":0}, {"0":"bob","1":8,"row":1} where
+    # the key refers to the relative index level and row refers to the row_position.
+    index_object_construct_key_values = [
+        pandas_lit(ROW_KEY),
+        col(row_position_snowflake_quoted_identifier),
+    ]
+    for i, snowflake_quoted_identifier in enumerate(
+        original_frame.index_column_snowflake_quoted_identifiers
+    ):
+        index_object_construct_key_values.append(pandas_lit(str(i)))
+        index_object_construct_key_values.append(col(snowflake_quoted_identifier))
+
+    unpivot_index_snowflake_identifier = (
+        original_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[index_column_name],
+        )[0]
+    )
+
+    normalize_unpivot_select_list = [
+        object_construct(*index_object_construct_key_values)
+        .cast(StringType())
+        .as_(unpivot_index_snowflake_identifier)
+    ]
+
+    # For the remaining data columns, we need to also transpose the position since this information later need
+    # to be mapped to the row_position to match expected ordering.  We do this by aliasing the column name to
+    # include the column position.  For example, columns 'employed', 'kids' would be aliased to json array
+    # [1, "employed"] and [2, "kids"] respectively.  Note that the unpivot columns must have same type across
+    # all unpivot columns, so we also cast to variant here if not all data types of the data columns are the same.
+
+    # If the original frame had *all* the same data types, then we can preserve this here otherwise
+    # we need to default to variant.
+    frame_data_type_map = original_frame.quoted_identifier_to_snowflake_type()
+    original_data_types = {
+        frame_data_type_map.get(snowflake_quoted_identifier)
+        for snowflake_quoted_identifier in original_frame.data_column_snowflake_quoted_identifiers
+    }
+    output_data_type = (
+        original_data_types.pop() if len(original_data_types) == 1 else VariantType()
+    )
+    # If the computed data type is ARRAY or MAP type, then we must convert it to VARIANT. This
+    # is particularly important when this unpivot opertion is used as part of a transpose operation
+    # because the PIVOT does not allow for aggregation.
+    # Since pandas represents array types with the `object` dtype, which VARIANT is converted
+    # to in post-processing, this does not cause any differences in behavior.
+    if isinstance(output_data_type, (ArrayType, MapType)):
+        output_data_type = VariantType()
+
+    unpivot_columns = []
+    passthrough_columns = []
+    passthrough_quoted_columns = []
+    for i, (pandas_label, snowflake_quoted_identifier) in enumerate(
+        zip(
+            original_frame.data_column_pandas_labels,
+            original_frame.data_column_snowflake_quoted_identifiers,
+        )
+    ):
+        # Filter columns from the unpivot list if needed
+        is_id_col = len(pandas_id_columns) > 0 and pandas_label in pandas_id_columns
+
+        is_var_col = (
+            len(pandas_value_columns) == 0 or pandas_label in pandas_value_columns
+        )
+        if is_id_col:
+            passthrough_columns.append(pandas_label)
+            passthrough_quoted_columns.append(snowflake_quoted_identifier)
+            continue
+        if not is_var_col:
+            continue
+        # Generate a random suffix to avoid conflict if there is already label [i, pandas_label].
+        # Since the serialized_name must be a valid json format, we add the suffix as an extra component
+        # of the list, instead of de-conflict on top of the serialized_name. The suffix will be
+        # automatically discarded during label extraction to get the correct label.
+        serialized_name = quote_name_without_upper_casing(
+            json.dumps([i, pandas_label, generate_column_identifier_random()])
+        )
+        normalize_unpivot_select_list.append(
+            # Replace NULLs in the column with value UNPIVOT_NULL_REPLACE_VALUE. The column is cast to
+            # variant column first, so that we can replace NULLs with a value without considering the column
+            # data type.
+            coalesce(
+                to_variant(snowflake_quoted_identifier),
+                to_variant(pandas_lit(UNPIVOT_NULL_REPLACE_VALUE)),
+            ).as_(serialized_name)
+        )
+        unpivot_columns.append(serialized_name)
+
+    ordered_dataframe = ordered_dataframe.select(
+        normalize_unpivot_select_list
+        + original_frame.data_column_snowflake_quoted_identifiers
+    )
+
+    # ordered_dataframe.to_pandas() at this point
+    #                              UNPIVOT_IDX [0, "abc", "sxi8"]     [1, "123", "uhkz"]   abc  123 state
+    # 0   {"0":"one","1":"there","row":-1}                "A"  1.000000000000000e+00     A  1.0    CA
+    # 1    {"0":"one","1":"there","row":0}                "A"  1.000000000000000e+00     A  1.0    CA
+    # 2       {"0":"two","1":"be","row":1}                "B"         "NULL_REPLACE"     B  NaN    WA
+    # 3  {"0":"two","1":"dragons","row":2}     "NULL_REPLACE"  3.000000000000000e+00  None  3.0    NY
+
+    # STEP 2) Perform an unpivot which flattens the original data columns into a single name and value rows
+    # grouped by the temporary transpose index column.  In the earlier example, this would flatten the non-index
+    # data into individual rows grouped by the index (UNPIVOT_INDEX) which later becomes the transposed
+    # column labels.
+    (
+        unpivot_value_quoted_snowflake_identifier,
+        unpivot_name_quoted_snowflake_identifier,
+        unpivot_object_name_quoted_snowflake_identifier,
+    ) = ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[
+            value_column_name,
+            variable_column_name,
+            object_column_name,
+        ],
+    )
+
+    ordered_dataframe = ordered_dataframe.unpivot(
+        unpivot_value_quoted_snowflake_identifier,
+        unpivot_name_quoted_snowflake_identifier,
+        unpivot_columns,
+    )
+
+    # ordered_dataframe.to_pandas() at this point
+    #                          UNPIVOT_IDX   abc  123 state    UNPIVOT_VARIABLE          UNPIVOT_VALUE
+    # 0   {"0":"one","1":"there","row":-1}     A  1.0    CA  [0, "abc", "sxi8"]                    "A"
+    # 1   {"0":"one","1":"there","row":-1}     A  1.0    CA  [1, "123", "uhkz"]  1.000000000000000e+00
+    # 2    {"0":"one","1":"there","row":0}     A  1.0    CA  [0, "abc", "sxi8"]                    "A"
+    # 3    {"0":"one","1":"there","row":0}     A  1.0    CA  [1, "123", "uhkz"]  1.000000000000000e+00
+    # 4       {"0":"two","1":"be","row":1}     B  NaN    WA  [0, "abc", "sxi8"]                    "B"
+    # 5       {"0":"two","1":"be","row":1}     B  NaN    WA  [1, "123", "uhkz"]         "NULL_REPLACE"
+    # 6  {"0":"two","1":"dragons","row":2}  None  3.0    NY  [0, "abc", "sxi8"]         "NULL_REPLACE"
+    # 7  {"0":"two","1":"dragons","row":2}  None  3.0    NY  [1, "123", "uhkz"]  3.000000000000000e+00
+    assert (
+        len(original_frame.data_column_snowflake_quoted_identifiers) > 0
+    ), "no data column to unpivot"
+
+    # Replace the null value back by checking if the value in the origin data column is null and also the
+    # unpivot name column value is original data column name.
+    case_conditions: list[SnowparkColumn] = []
+    for origin_data_column, serialized_name in zip(
+        original_frame.data_column_snowflake_quoted_identifiers, unpivot_columns
+    ):
+        unquoted_serialized_name = unquote_name_if_quoted(serialized_name)
+        case_conditions.append(
+            (
+                col(unpivot_value_quoted_snowflake_identifier)
+                == pandas_lit(UNPIVOT_NULL_REPLACE_VALUE)
+            )
+            & is_null(origin_data_column)
+            & (
+                col(unpivot_name_quoted_snowflake_identifier)
+                == pandas_lit(unquoted_serialized_name)
+            )
+        )
+    case_expr: CaseExpr = when(case_conditions[0], pandas_lit(None))
+    for case_condition in case_conditions[1:]:
+        case_expr = case_expr.when(case_condition, pandas_lit(None))
+
+    # add otherwise clause
+    case_column = case_expr.otherwise(col(unpivot_value_quoted_snowflake_identifier))
+    unpivot_value_column = (
+        value_column_name
+        if not is_single_row
+        # Since step 3 is skipped for single-row dataframes, the value below is chosen such that it
+        # simulates the output of step 3 and becomes compatible with step 4.
+        else VALUE_COLUMN_FOR_SINGLE_ROW
+    )
+    new_unpivot_value_quoted_identifier = (
+        ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[unpivot_value_column],
+        )[0]
+    )
+    # cast the column back to the desired data type output_data_type
+    case_column = cast(case_column, output_data_type).as_(
+        new_unpivot_value_quoted_identifier
+    )
+    select_col_names = [] + passthrough_quoted_columns
+    select_col_names += [unpivot_name_quoted_snowflake_identifier]
+    if not is_single_row:
+        select_col_names += [unpivot_index_snowflake_identifier]
+    ordered_dataframe = ordered_dataframe.select(
+        *select_col_names,
+        case_column,
+    )
+    # ordered_dataframe.to_pandas() at this point
+    #   state    UNPIVOT_VARIABLE                        UNPIVOT_IDX      UNPIVOT_VALUE_brl1
+    # 0    CA  [0, "abc", "sxi8"]   {"0":"one","1":"there","row":-1}                    "A"
+    # 1    CA  [1, "123", "uhkz"]   {"0":"one","1":"there","row":-1}  1.000000000000000e+00
+    # 2    CA  [0, "abc", "sxi8"]    {"0":"one","1":"there","row":0}                    "A"
+    # 3    CA  [1, "123", "uhkz"]    {"0":"one","1":"there","row":0}  1.000000000000000e+00
+    # 4    WA  [0, "abc", "sxi8"]       {"0":"two","1":"be","row":1}                    "B"
+    # 5    WA  [1, "123", "uhkz"]       {"0":"two","1":"be","row":1}                   None
+    # 6    NY  [0, "abc", "sxi8"]  {"0":"two","1":"dragons","row":2}                   None
+    # 7    NY  [1, "123", "uhkz"]  {"0":"two","1":"dragons","row":2}  3.000000000000000e+00
+    # Parse the json object unpivot name column because we will need to extract the row position and, in the case
+    # of multi-level index, parse each level into a different index column.
+    ordered_dataframe = append_columns(
+        ordered_dataframe,
+        unpivot_object_name_quoted_snowflake_identifier,
+        parse_json(unpivot_name_quoted_snowflake_identifier),
+    )
+    # ordered_dataframe.to_pandas() at this point
+    #   state    UNPIVOT_VARIABLE                        UNPIVOT_IDX     UNPIVOT_VALUE_brl1                UNPIVOT_OBJ_NAME
+    # 0    CA  [0, "abc", "sxi8"]   {"0":"one","1":"there","row":-1}                    "A"  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 1    CA  [1, "123", "uhkz"]   {"0":"one","1":"there","row":-1}  1.000000000000000e+00  [\n  1,\n  "123",\n  "uhkz"\n]
+    # 2    CA  [0, "abc", "sxi8"]    {"0":"one","1":"there","row":0}                    "A"  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 3    CA  [1, "123", "uhkz"]    {"0":"one","1":"there","row":0}  1.000000000000000e+00  [\n  1,\n  "123",\n  "uhkz"\n]
+    # 4    WA  [0, "abc", "sxi8"]       {"0":"two","1":"be","row":1}                    "B"  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 5    WA  [1, "123", "uhkz"]       {"0":"two","1":"be","row":1}                   None  [\n  1,\n  "123",\n  "uhkz"\n]
+    # 6    NY  [0, "abc", "sxi8"]  {"0":"two","1":"dragons","row":2}                   None  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 7    NY  [1, "123", "uhkz"]  {"0":"two","1":"dragons","row":2}  3.000000000000000e+00  [\n  1,\n  "123",\n  "uhkz"\n]
+    return UnpivotResultInfo(
+        ordered_dataframe,
+        unpivot_index_snowflake_identifier,
+        new_unpivot_value_quoted_identifier,
+        unpivot_name_quoted_snowflake_identifier,
+        unpivot_object_name_quoted_snowflake_identifier,
+        passthrough_columns,
+        passthrough_quoted_columns,
+    )
+
+
+def clean_up_unpivot(
+    original_frame: InternalFrame,
+    ordered_unpivoted_df: OrderedDataFrame,
+    unpivot_index_snowflake_identifier: str,
+    new_value_quoted_snowflake_identifier: str,
+    variable_final_column_name: Hashable,
+    value_final_column_name: Hashable,
+    pandas_id_columns: Optional[list[Hashable]] = None,
+    snowflake_id_quoted_columns: Optional[list[str]] = None,
+    ignore_index: Optional[bool] = False,
+) -> InternalFrame:
+    """
+    Cleans up an unpivot operation and reconstructs the index.
+
+    Args:
+        original_frame: The original InternalFrame for the transpose
+        ordered_transposed_df: The transposed ordered dataframe
+        unpivot_index_snowflake_identifier: column name of the unpivot index
+        new_value_quoted_snowflake_identifier: intermediate column name for the "value" column
+        variable_final_column_name: pandas column name for the "variable" of the unpivot
+        value_final_column_name: pandas column name for the "value" of the unpivot
+        pandas_id_columns: set of columns left untouched by the pivot operation
+        snowflake_id_quoted_columns: quoted version of the passthrough columns
+        ignore_index: if False, reconstruct the index of the original dataframe
+
+    Returns:
+        The unpivoted InternalFrame.
+    """
+    # ordered_dataframe.to_pandas() at this point
+    #   state    UNPIVOT_VARIABLE                        UNPIVOT_IDX     UNPIVOT_VALUE_brl1                UNPIVOT_OBJ_NAME
+    # 0    CA  [0, "abc", "sxi8"]    {"0":"one","1":"there","row":0}                    "A"  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 1    CA  [1, "123", "uhkz"]    {"0":"one","1":"there","row":0}  1.000000000000000e+00  [\n  1,\n  "123",\n  "uhkz"\n]
+    # 2    WA  [0, "abc", "sxi8"]       {"0":"two","1":"be","row":1}                    "B"  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 3    WA  [1, "123", "uhkz"]       {"0":"two","1":"be","row":1}                   None  [\n  1,\n  "123",\n  "uhkz"\n]
+    # 4    NY  [0, "abc", "sxi8"]  {"0":"two","1":"dragons","row":2}                   None  [\n  0,\n  "abc",\n  "sxi8"\n]
+    # 5    NY  [1, "123", "uhkz"]  {"0":"two","1":"dragons","row":2}  3.000000000000000e+00  [\n  1,\n  "123",\n  "uhkz"\n]
+
+    if pandas_id_columns is None:
+        pandas_id_columns = []
+    if snowflake_id_quoted_columns is None:
+        snowflake_id_quoted_columns = []
+
+    value_column_quoted = f'"{value_final_column_name}"'
+    variables_column_quoted = f'"{variable_final_column_name}"'
+    value_column = col(new_value_quoted_snowflake_identifier).as_(value_column_quoted)
+
+    ordering_column_names = ordered_unpivoted_df.generate_snowflake_quoted_identifiers(
+        pandas_labels=[
+            "col_order" + generate_column_identifier_random(),
+            "row_order" + generate_column_identifier_random(),
+        ],
+    )
+
+    # Extract new ordering columns
+    col_order_column = get(col(UNPIVOT_OBJ_NAME_COLUMN), 0).as_(
+        ordering_column_names[0]
+    )
+    row_order_column = get_path(
+        parse_json(col(unpivot_index_snowflake_identifier)), lit(ROW_KEY)
+    ).as_(ordering_column_names[1])
+
+    # Reconstruct the index
+    index_columns = []
+    index_column_names = [None]
+    index_column_pandas_names = [None]
+    is_index_set = original_frame.num_index_columns > 0
+    is_multi_index = (
+        len(original_frame.index_column_pandas_labels) > 1
+        and original_frame.index_column_pandas_labels[0] is not None
+    )
+    if ignore_index is False and is_index_set:
+        if is_multi_index:
+            index_column_names = (
+                original_frame.index_column_snowflake_quoted_identifiers
+            )
+            index_column_pandas_names = original_frame.index_column_pandas_labels
+        else:
+            index_column_names = (
+                ordered_unpivoted_df.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[
+                        UNPIVOT_SINGLE_INDEX_PREFIX
+                        + generate_column_identifier_random(),
+                    ],
+                )
+            )
+        for level in range(len(index_column_names)):
+            index_column_name = index_column_names[level]
+            index_columns.append(
+                get_path(
+                    parse_json(col(unpivot_index_snowflake_identifier)),
+                    lit(f'"{level}"'),
+                ).as_(index_column_name)
+            )
+
+    # extract the variable column and rename
+    variable_column = get(col(UNPIVOT_OBJ_NAME_COLUMN), 1).as_(variables_column_quoted)
+
+    projected_columns = (
+        index_columns
+        + snowflake_id_quoted_columns
+        + [
+            col_order_column,
+            row_order_column,
+            variable_column,
+            value_column,
+        ]
+    )
+    ordered_dataframe = ordered_unpivoted_df.select(projected_columns)
+
+    # ordered_dataframe.to_pandas() at this point
+    #   __L1__     __L2__ col_orderb6wa row_ordery6hw {variable_final_column_name} {value_final_column_name}
+    # 0  "one"    "there"             0             0                        "abc"                       "A"
+    # 1  "one"    "there"             1             0                        "123"     1.000000000000000e+00
+    # 2  "two"       "be"             0             1                        "abc"                       "B"
+    # 3  "two"       "be"             1             1                        "123"                      None
+    # 4  "two"  "dragons"             0             2                        "abc"                      None
+    # 5  "two"  "dragons"             1             2                        "123"     3.000000000000000e+00
+
+    # sort by the ordering columns
+    ordered_dataframe = ordered_dataframe.sort(
+        OrderingColumn(ordering_column_names[0]),
+        OrderingColumn(ordering_column_names[1]),
+    )
+
+    final_pandas_labels = pandas_id_columns + [
+        variable_final_column_name,
+        value_final_column_name,
+    ]
+    final_snowflake_qouted_identfiers = snowflake_id_quoted_columns + [
+        variables_column_quoted,
+        value_column_quoted,
+    ]
+    ordered_dataframe = ordered_dataframe.ensure_row_position_column()
+
+    # setup the index names for the internal frame
+    index_column_quoted_names = [
+        ordered_dataframe.row_position_snowflake_quoted_identifier
+    ]
+    if not ignore_index and is_index_set:
+        index_column_quoted_names = index_column_names
+
+    new_internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=final_pandas_labels,
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=final_snowflake_qouted_identfiers,
+        index_column_pandas_labels=index_column_pandas_names,
+        index_column_snowflake_quoted_identifiers=index_column_quoted_names,
+    )
+
+    # Rename the data column snowflake quoted identifiers to be closer to pandas labels, normalizing names
+    # will remove information like row position that may have temporarily been included in column names to track
+    # during earlier steps.
+    new_internal_frame = (
+        new_internal_frame.normalize_snowflake_quoted_identifiers_with_pandas_label()
+    )
+    # full ordered_dataframe.to_pandas() at this point
+    #       L1         L2 col_ordermg7c row_orderiq3v independent              dependent                        UNPIVOT_IDX   abc  123 state    UNPIVOT_VARIABLE          UNPIVOT_VALUE  __row_position__
+    # 0  "one"    "there"             0             0       "abc"                    "A"    {"0":"one","1":"there","row":0}     A  1.0    CA  [0, "abc", "z851"]                    "A"                 0
+    # 1  "two"       "be"             0             1       "abc"                    "B"       {"0":"two","1":"be","row":1}     B  NaN    WA  [0, "abc", "z851"]                    "B"                 1
+    # 2  "two"  "dragons"             0             2       "abc"                   None  {"0":"two","1":"dragons","row":2}  None  3.0    NY  [0, "abc", "z851"]         "NULL_REPLACE"                 2
+    # 3  "one"    "there"             1             0       "123"  1.000000000000000e+00    {"0":"one","1":"there","row":0}     A  1.0    CA  [1, "123", "kuxa"]  1.000000000000000e+00                 3
+    # 4  "two"       "be"             1             1       "123"                   None       {"0":"two","1":"be","row":1}     B  NaN    WA  [1, "123", "kuxa"]         "NULL_REPLACE"                 4
+    # 5  "two"  "dragons"             1             2       "123"  3.000000000000000e+00  {"0":"two","1":"dragons","row":2}  None  3.0    NY  [1, "123", "kuxa"]  3.000000000000000e+00                 5
+    return new_internal_frame
+
+
+def _simple_unpivot(
+    original_frame: InternalFrame,
+    pandas_id_columns: list[Hashable],
+    pandas_value_columns: list[Hashable],
+    pandas_var_name: Optional[Hashable],
+    pandas_value_name: Optional[Hashable],
+) -> InternalFrame:
+    """
+    Performs a melt/unpivot on a a dataframe, when the index can be
+    ignored. Does not handle multi-index or duplicate column names.
+
+    Args:
+        original_frame: InternalFrame prior to unpivot
+        pandas_id_columns: identity columns which should be retained, untouched in the result
+        pandas_value_columns: columns to unpivot, if empty all columns are unpivoted
+        pandas_var_name: name used for the variable column from the unpivot
+        pandas_value_name: name used for the value column from the unpivot operation
+
+    Returns:
+        An unpivoted dataframe, similar to the melt semantics
+    """
+    frame = original_frame
+    ordered_dataframe = frame.ordered_dataframe
+
+    ##########################################
+    # OrderedDataFrame at this Point
+    ##########################################
+    #    __index__   abc   123  __row_position__
+    # 0          0     A     1                 0
+    # 1          1  None     2                 1
+    # 2          2     C  None                 2
+
+    # create a column name to be used for ordering after the melt, based on
+    # column posiiton
+    ordering_column_name = ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[UNPIVOT_ORDERING_COLUMN_PREFIX],
+    )[0]
+
+    # output columns for variable and values
+    (
+        var_quoted,
+        value_quoted,
+    ) = ordered_dataframe.generate_snowflake_quoted_identifiers(
+        pandas_labels=[
+            DEFAULT_PANDAS_UNPIVOT_VARIABLE_NAME
+            if pandas_var_name is None
+            else pandas_var_name,
+            pandas_value_name,
+        ],
+    )
+
+    # create the initial set of columns to be retained as identifiers and those
+    # which will be unpivoted. Collect data type information.
+    unpivot_quoted_columns = []
+    ordering_decode_conditions = []
+    id_col_names = []
+    id_col_quoted_identifiers = []
+    for (pandas_label, snowflake_quoted_identifier) in zip(
+        frame.data_column_pandas_labels,
+        frame.data_column_snowflake_quoted_identifiers,
+    ):
+        is_id_col = pandas_label in pandas_id_columns
+        is_var_col = pandas_label in pandas_value_columns
+        if is_var_col:
+            # create the ordering clause for this variable column
+            # to maintain a consistent ordering with pandas. This
+            # is used in the case statement below.
+            ordering_decode_conditions.append(
+                col(var_quoted) == pandas_lit(pandas_label)
+            )
+            unpivot_quoted_columns.append(snowflake_quoted_identifier)
+        if is_id_col:
+            id_col_names.append(pandas_label)
+            id_col_quoted_identifiers.append(snowflake_quoted_identifier)
+
+    # create the case expressions used for the final result set ordering based
+    # on the column position. This clause will be appled after the unpivot
+    order_by_case_lit = 0
+    ordering_column_case_expr: CaseExpr = when(
+        ordering_decode_conditions[0], pandas_lit(order_by_case_lit)
+    )
+    for case_condition in ordering_decode_conditions[1:]:
+        order_by_case_lit = order_by_case_lit + 1
+        ordering_column_case_expr = ordering_column_case_expr.when(
+            case_condition, pandas_lit(order_by_case_lit)
+        )
+    ordering_column_case_expr = ordering_column_case_expr.otherwise(
+        pandas_lit(0)
+    ).alias(ordering_column_name)
+
+    # Normalize the input columns to the same type, if necessary
+    unpivot_columns_normalized_types = []
+    renamed_quoted_unpivot_cols = []
+
+    suffix_to_unpivot_map: dict[str, str] = {}
+    cast_suffix = generate_column_identifier_random()
+    null_replace_value = UNPIVOT_NULL_REPLACE_VALUE + "_" + cast_suffix
+
+    for c in unpivot_quoted_columns:
+        # Rename the columns to unpivot
+        unquoted_col_name = c.strip('"') + "_" + cast_suffix
+        renamed_quoted_unpivot_col = (
+            ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[unquoted_col_name],
+            )[0]
+        )
+        # coalese the values to unpivot and preserve null values This code
+        # can be removed when UNPIVOT_INCLUDE_NULLS is enabled
+        unpivot_columns_normalized_types.append(
+            coalesce(to_variant(c), to_variant(pandas_lit(null_replace_value))).alias(
+                renamed_quoted_unpivot_col
+            )
+        )
+        renamed_quoted_unpivot_cols.append(renamed_quoted_unpivot_col)
+        # create the column name mapper which is passed to unpivot
+        suffix_to_unpivot_map[renamed_quoted_unpivot_col] = c
+
+    # select a subset of casted columns
+    normalized_projection = unpivot_columns_normalized_types + id_col_quoted_identifiers
+    ordered_dataframe = ordered_dataframe.select(normalized_projection)
+
+    ##################################################
+    # OrderedDataFrame at this point, prior to unpivot
+    ##################################################
+    #               abc_tavu             123_tavu
+    # 0                  "A"                  "1"
+    # 1  "NULL_REPLACE_tavu"                  "2"
+    # 2                  "C"  "NULL_REPLACE_tavu"
+
+    # Perform the unpivot
+    ordered_dataframe = ordered_dataframe.unpivot(
+        value_column=value_quoted,
+        name_column=var_quoted,
+        column_list=renamed_quoted_unpivot_cols,
+        col_mapper=suffix_to_unpivot_map,
+    )
+
+    ##############################################
+    # OrderedDataFrame at this point after unpivot
+    ##############################################
+    #      variable                value
+    # 0      123                  "1"
+    # 1      123                  "2"
+    # 2      123  "NULL_REPLACE_tavu"
+    # 3      abc                  "A"
+    # 4      abc                  "C"
+    # 5      abc  "NULL_REPLACE_tavu"
+
+    corrected_value_column_name = (
+        ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=["corrected_value_" + generate_column_identifier_random()],
+        )[0]
+    )
+    corrected_null_replace_case_expr: CaseExpr = when(
+        (col(value_quoted) == pandas_lit(null_replace_value)), pandas_lit(None)
+    )
+
+    # add otherwise clause to complete the normalization of values
+    corrected_null_replace_column = corrected_null_replace_case_expr.otherwise(
+        col(value_quoted)
+    ).alias(corrected_value_column_name)
+
+    # Reorder the resulting expression to match pandas based on the original column order,
+    # which is now in the "variable" column
+    unpivoted_columns = (
+        ordered_dataframe._get_active_column_snowflake_quoted_identifiers()
+    )
+    ordered_dataframe = ordered_dataframe.select(
+        *unpivoted_columns, ordering_column_case_expr, corrected_null_replace_column
+    ).sort(OrderingColumn(ordering_column_name))
+    ordered_dataframe = ordered_dataframe.ensure_row_position_column()
+
+    ###########################################################################################
+    # OrderedDataFrame at this point, prior to creation of the new InternalFrame
+    ###########################################################################################
+    #   variable                value  UNPIVOT_ORDERING_ corrected_value_8ofo  __row_position__
+    # 0      abc                  "A"                  0                  "A"                 0
+    # 1      abc  "NULL_REPLACE_tavu"                  0                 None                 1
+    # 2      abc                  "C"                  0                  "C"                 2
+    # 3      123                  "1"                  1                  "1"                 3
+    # 4      123                  "2"                  1                  "2"                 4
+    # 5      123  "NULL_REPLACE_tavu"                  1                 None                 5
+
+    final_pandas_labels = id_col_names + [pandas_var_name, pandas_value_name]
+
+    final_snowflake_quoted_cols = id_col_quoted_identifiers + [
+        var_quoted,
+        corrected_value_column_name,
+    ]
+
+    # Create the new frame and compiler
+    return InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=final_pandas_labels,
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=final_snowflake_quoted_cols,
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=[
+            ordered_dataframe.row_position_snowflake_quoted_identifier
+        ],
+    )
+
+
+def unpivot_empty_df() -> "SnowflakeQueryCompiler":  # type: ignore[name-defined] # noqa: F821
+    """
+    Special casing when the data frame is empty entirely. Similar to
+    transpose_empty_df.
+    """
+    import pandas as native_pd
+
+    from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+        SnowflakeQueryCompiler,
+    )
+
+    return SnowflakeQueryCompiler.from_pandas(
+        native_pd.DataFrame(
+            {
+                DEFAULT_PANDAS_UNPIVOT_VARIABLE_NAME: [],
+                DEFAULT_PANDAS_UNPIVOT_VALUE_NAME: [],
+            }
+        )
+    )
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/utils.py b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
new file mode 100644
index 00000000000..faa878198cf
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/utils.py
@@ -0,0 +1,1745 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import json
+import logging
+import re
+import traceback
+from collections.abc import Hashable, Iterable, Sequence
+from enum import Enum
+from typing import Any, Callable, Optional, Union
+
+import numpy as np
+import pandas as native_pd
+from pandas._typing import Scalar
+from pandas.core.dtypes.common import is_integer_dtype, is_scalar
+
+import snowflake.snowpark.modin.pandas as pd
+import snowflake.snowpark.modin.plugin._internal.statement_params_constants as STATEMENT_PARAMS
+from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    DOUBLE_QUOTE,
+    EMPTY_STRING,
+    quote_name_without_upper_casing,
+)
+from snowflake.snowpark._internal.analyzer.expression import Literal
+from snowflake.snowpark._internal.type_utils import LiteralType
+from snowflake.snowpark._internal.utils import (
+    SNOWFLAKE_OBJECT_RE_PATTERN,
+    TempObjectType,
+    generate_random_alphanumeric,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from snowflake.snowpark.functions import (
+    col,
+    count,
+    max as max_,
+    mean,
+    min as min_,
+    sum as sum_,
+    typeof,
+)
+from snowflake.snowpark.modin.plugin._internal import frame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._typing import LabelTuple
+from snowflake.snowpark.modin.plugin.utils.exceptions import (
+    SnowparkPandasErrorCode,
+    SnowparkPandasException,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import (
+    ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING,
+    WarningMessage,
+)
+from snowflake.snowpark.types import (
+    ArrayType,
+    DataType,
+    MapType,
+    StructField,
+    StructType,
+    VariantType,
+)
+
+ROW_POSITION_COLUMN_LABEL = "row_position"
+MAX_ROW_POSITION_COLUMN_LABEL = f"MAX_{ROW_POSITION_COLUMN_LABEL}"
+INDEX_LABEL = "index"
+# label used for data column to create the snowflake quoted identifier when the pandas
+# label for the column is None
+DEFAULT_DATA_COLUMN_LABEL = "data"
+LEVEL_LABEL = "level"
+ITEM_VALUE_LABEL = "item_value"
+ORDERING_COLUMN_LABEL = "ordering"
+READ_ONLY_TABLE_SUFFIX = "READONLY"
+METADATA_ROW_POSITION_COLUMN = "METADATA$ROW_POSITION"
+# read only table is only supported for base table or base temporary table
+READ_ONLY_TABLE_SUPPORTED_TABLE_KINDS = ["LOCAL TEMPORARY", "BASE TABLE"]
+UNDEFINED = "undefined"
+# number of digits used to generate a random suffix
+_NUM_SUFFIX_DIGITS = 4
+ROW_COUNT_COLUMN_LABEL = "row_count"
+
+# max number of retries used when generating conflict free quoted identifiers
+_MAX_NUM_RETRIES = 3
+_MAX_IDENTIFIER_LENGTH = 32
+
+_logger = logging.getLogger(__name__)
+
+
+# This is the default statement parameters for queries from Snowpark pandas API. It provides the fine grain metric for
+# the server to track all pandas API usage.
+def get_default_snowpark_pandas_statement_params() -> dict[str, str]:
+    return {STATEMENT_PARAMS.SNOWPARK_API: STATEMENT_PARAMS.PANDAS_API}
+
+
+class FillNAMethod(Enum):
+    """
+    Enum that defines the fillna methods - ffill for forward filling, and bfill for backfilling.
+    """
+
+    FFILL_METHOD = "ffill"
+    BFILL_METHOD = "bfill"
+
+    @classmethod
+    def get_enum_for_string_method(cls, method_name: str) -> "FillNAMethod":
+        """
+        Returns the appropriate Enum member for the given method.
+
+        Args:
+            method_name : str
+                The name of the method to use for fillna.
+
+        Returns:
+            FillNAMethod : The instance of the Enum corresponding to the specified method name,
+                           or a ValueError if none match.
+
+        Notes:
+            This method is necessary since the two methods (ffill and bfill) have aliases - pad for ffill
+            and backfill for bfill. Rather than having four members of this Enum, we'd rather just map
+            `pad` to FillNAMethod.FFILL_METHOD and `backfill` to FillNAMethod.BFILL_METHOD so we don't have
+            to check if a method is one of two Enum values that are functionally the same.
+        """
+        try:
+            return cls(method_name)
+        except ValueError:
+            if method_name == "pad":
+                return cls("ffill")
+            elif method_name == "backfill":
+                return cls("bfill")
+            else:
+                raise ValueError(
+                    f"Invalid fillna method: {method_name}. Expected one of ['ffill', 'pad', 'bfill', 'backfill']"
+                )
+
+
+def _is_table_name(table_name_or_query: str) -> bool:
+    """
+    Checks whether the provided string is a table name or not.
+
+    Args:
+        table_name_or_query: the string to check
+
+    Returns:
+        True if it is a valid table name.
+    """
+    # SNOWFLAKE_OBJECT_RE_PATTERN contains the pattern for identifiers in Snowflake.
+    # If we do not get a match for the SNOWFLAKE_OBJECT_RE_PATTERN, we know that
+    # the string passed in is not a valid identifier, so it cannot be a table name.
+    return SNOWFLAKE_OBJECT_RE_PATTERN.match(table_name_or_query) is not None
+
+
+def _check_if_sql_query_contains_order_by_and_warn_user(
+    logical_plan: native_pd.DataFrame,
+) -> bool:
+    """
+    Checks whether the sql query corresponding to the logical plan passed in contains an order by clause
+    and warns the user that ORDER BY will be ignored currently.
+
+    Args:
+        logical_plan: The logical plan to check.
+
+    Returns:
+        Whether or not the query contains an order by.
+    """
+    # We need to determine if the query contains an ORDER BY, which we can determine by looking at the
+    # logical plan. If a query contains an ORDER BY that affects the ordering of the final result,
+    # the logical plan will contain a row whose parent operator is the operation with id 0
+    # (the Result operation), and whose operation is either a Sort (ORDER BY) or a SortWithLimit
+    # (ORDER BY {LIMIT|FETCH}).
+    # Examples of a logical plan with an ORDER BY:
+    #    step   id parentOperators    operation ...
+    # 0   NaN  NaN            None  GlobalStats ...
+    # 1   1.0  0.0            None       Result ...
+    # 2   1.0  1.0             [0]         Sort ...
+    # 3   1.0  2.0             [1]    TableScan ...
+    # (ellipsis denotes additional columns that have been removed since they are unnecessary for this example.)
+    #    step   id parentOperators      operation ...
+    # 0   NaN  NaN            None    GlobalStats ...
+    # 1   1.0  0.0            None         Result ...
+    # 2   1.0  1.0             [0]  SortWithLimit ...
+    # 3   1.0  2.0             [1]      TableScan ...
+    potential_sort_row = logical_plan[
+        (logical_plan["parentOperators"] == "[0]")
+        & (logical_plan["operation"].str.lower().str.contains("sort"))
+    ]
+    contains_order_by = len(potential_sort_row) > 0
+    if contains_order_by:
+        # If the query contains an ORDER BY, we need to warn the user that
+        # the ordering induced by the ORDER BY is not guaranteed to be preserved
+        # in the ordering of the returned DataFrame, and that they should use
+        # sort_values on the returned object to guarantee an ordering.
+        WarningMessage.single_warning(ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING)
+    return contains_order_by
+
+
+def _extract_base_table_from_simple_select_star_query(sql_query: str) -> str:
+    """
+    Takes a SQL Query or table name as input, and attempts to reduce it to its base table name
+    if it is of the form SELECT * FROM table. Otherwise, returns the original query or table name.
+
+    Returns:
+        str
+        The base table name or SQL Query.
+    """
+    base_table_name = None
+    if not _is_table_name(sql_query):
+        # We first need to find all occurences of `select * from`, since the query may be nested.
+        select_star_match = re.match(r"select \* from ", sql_query.lower())
+        if select_star_match is not None:
+            snowflake_object_match = re.fullmatch(
+                SNOWFLAKE_OBJECT_RE_PATTERN, sql_query[select_star_match.end() :]  # type: ignore[union-attr]
+            )
+            # snowflake_object_match will only be None if whatever followed `select * from` in
+            # our original query did not match the regex for a Snowflake Object. This could be the
+            # case, e.g., when our query looks like `select * from (select * from OBJECT)`.
+            # If it is not None, then we should extract the object that was found as the
+            # base table name.
+            if snowflake_object_match:
+                base_table_name = snowflake_object_match.group()
+    return sql_query if base_table_name is None else base_table_name
+
+
+def _create_read_only_table(
+    table_name: str,
+    materialize_into_temp_table: bool,
+    materialization_reason: Optional[str] = None,
+) -> str:
+    """
+    create read only table for the given table.
+
+    Args:
+        table_name: the table to create read only table on top of.
+        materialize_into_temp_table: whether to create a temp table of the given table. If true, the
+                    read only table will be created on top of the temp table instead of
+                    the original table.
+                    Read only table creation is only supported for temporary table or regular
+                    table at this moment. If the table is not those two types, you can set
+                    materialize_into_temp_table to True to create a temporary table out of it for
+                    read only table creation. Otherwise, creation will fail.
+        materialization_reason: why materialization into temp table is needed for creation of the read
+                    only table. This is only needed when materialization_into_temp_table is true.
+    Returns:
+        The name of the read only table created.
+    """
+    session = pd.session
+
+    # use random_name_for_temp_object, there is a check at server side for
+    # temp object used in snowpark stored proc, which needs to match the following regExpr
+    # "^SNOWPARK_TEMP_(TABLE|VIEW|STAGE|FUNCTION|TABLE_FUNCTION|FILE_FORMAT|PROCEDURE)_[0-9A-Z]+$")
+    readonly_table_name = (
+        f"{random_name_for_temp_object(TempObjectType.TABLE)}{READ_ONLY_TABLE_SUFFIX}"
+    )
+
+    # If we need to materialize into a temp table our create table expression
+    # needs to be SELECT * FROM (object).
+    if materialize_into_temp_table:
+        ctas_query = f"SELECT * FROM {table_name}"
+        temp_table_name = random_name_for_temp_object(TempObjectType.TABLE)
+
+        _logger.debug(f"Materialize temporary table {temp_table_name} for {ctas_query}")
+
+        statement_params = get_default_snowpark_pandas_statement_params()
+        # record 1) original table name (which may not be an actual table)
+        #        2) the name for the new temp table that has been created/materialized
+        #        3) the reason why materialization happens
+        new_params = {
+            STATEMENT_PARAMS.MATERIALIZATION_TABLE_NAME: temp_table_name,
+            STATEMENT_PARAMS.MATERIALIZATION_REASON: materialization_reason
+            if materialization_reason is not None
+            else STATEMENT_PARAMS.UNKNOWN,
+        }
+        statement_params.update(new_params)
+        session.sql(
+            f"CREATE OR REPLACE TEMPORARY TABLE {temp_table_name} AS {ctas_query}"
+        ).collect(statement_params=statement_params)
+        table_name = temp_table_name
+
+    statement_params = get_default_snowpark_pandas_statement_params()
+    # record the actual table that the read only table is created on top of, and also the name of the
+    # read only table that is created.
+    statement_params.update(
+        {
+            STATEMENT_PARAMS.READONLY_SOURCE_TABLE_NAME: table_name,
+            STATEMENT_PARAMS.READONLY_TABLE_NAME: readonly_table_name,
+        }
+    )
+    session.sql(
+        f"CREATE OR REPLACE TEMPORARY READ ONLY TABLE {readonly_table_name} CLONE {table_name}"
+    ).collect(statement_params=statement_params)
+
+    return readonly_table_name
+
+
+def create_ordered_dataframe_with_readonly_temp_table(
+    table_name_or_query: Union[str, Iterable[str]],
+) -> tuple[OrderedDataFrame, str]:
+    """
+    create read only temp table on top of the existing table or Snowflake query, and create a OrderedDataFrame
+    with row position column using the read only temp table created.
+
+    Args:
+        table_name_or_query: A string or list of strings that specify the table name or
+            fully-qualified object identifier (database name, schema name, and table name) or SQL query.
+
+    Returns:
+        OrderedDataFrame with row position column.
+        snowflake quoted identifier for the row position column.
+    """
+    if not isinstance(table_name_or_query, str) and isinstance(
+        table_name_or_query, Iterable
+    ):
+        table_name_or_query = ".".join(table_name_or_query)
+
+    session = pd.session
+    # `table_name_or_query` can be either a table name or a query. If it is a query of the form
+    # SELECT * FROM table, we can parse out the base table name, and treat it as though the user
+    # called `pd.read_snowflake("table")` instead of treating it as a SQL query, which will result
+    # in the materialization of an additional temporary table. Since that is the case, we first
+    # see if the coercion can happen, before determining if we are dealing with a query or not.
+    table_name_or_query = _extract_base_table_from_simple_select_star_query(
+        table_name_or_query
+    )
+    is_query = not _is_table_name(table_name_or_query)
+    if not is_query:
+        try:
+            readonly_table_name = _create_read_only_table(
+                table_name=table_name_or_query,
+                materialize_into_temp_table=False,
+            )
+        except SnowparkSQLException as ex:
+            _logger.debug(
+                f"Failed to create read only table for {table_name_or_query}: {ex}"
+            )
+            # Creation of read only table fails for following cases which are not possible
+            # (or very difficult) to detect on client side in advance. We explicitly check
+            # for these errors and create a temporary table by copying the content of the
+            # original table and then create the read only table on the top of this
+            # temporary table.
+            # 1. Row access Policy:
+            #   If the table has row access policy associated, read only table creation will
+            #   fail. SNOW-850878 is created to support the query for row access policy on
+            #   server side.
+            # 2. Table can not be cloned:
+            #   Clone is not supported for tables that are imported from a share, views etc.
+            # 3. Table doesn't support read only table creation:
+            #   Includes iceberg table, hybrid table etc.
+            known_errors = (
+                "Row access policy is not supported on read only table",  # case 1
+                "Cannot clone",  # case 2
+                "Unsupported feature",  # case 3
+            )
+            if any(error in ex.message for error in known_errors):
+                readonly_table_name = _create_read_only_table(
+                    table_name=table_name_or_query,
+                    materialize_into_temp_table=True,
+                    materialization_reason=ex.message,
+                )
+            else:
+                raise SnowparkPandasException(
+                    f"Failed to create Snowpark pandas DataFrame out of table {table_name_or_query} with error {ex}",
+                    error_code=SnowparkPandasErrorCode.GENERAL_SQL_EXCEPTION.value,
+                ) from ex
+        initial_ordered_dataframe = OrderedDataFrame(
+            DataFrameReference(session.table(readonly_table_name))
+        )
+        # generate a snowflake quoted identifier for row position column that can be used for aliasing
+        snowflake_quoted_identifiers = (
+            initial_ordered_dataframe.projected_column_snowflake_quoted_identifiers
+        )
+        row_position_snowflake_quoted_identifier = (
+            initial_ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[ROW_POSITION_COLUMN_LABEL],
+                wrap_double_underscore=True,
+            )[0]
+        )
+
+        # create snowpark dataframe with columns: row_position_snowflake_quoted_identifier + snowflake_quoted_identifiers
+        # if no snowflake_quoted_identifiers is specified, all columns will be selected
+        row_position_column_str = f"{METADATA_ROW_POSITION_COLUMN} as {row_position_snowflake_quoted_identifier}"
+
+        columns_to_select = ", ".join(
+            [row_position_column_str] + snowflake_quoted_identifiers
+        )
+        # Create or get the row position columns requires access to the metadata column of the table.
+        # However, snowpark_df = session().table(table_name) generates query (SELECT * from <table_name>),
+        # which creates a view without metadata column, we won't be able to access the metadata columns
+        # with the created snowpark dataframe. In order to get the metadata column access in the created
+        # dataframe, we create dataframe through sql which access the corresponding metadata column.
+        dataframe_sql = f"SELECT {columns_to_select} FROM {readonly_table_name}"
+        snowpark_df = session.sql(dataframe_sql)
+
+        result_columns_quoted_identifiers = [
+            row_position_snowflake_quoted_identifier
+        ] + snowflake_quoted_identifiers
+        ordered_dataframe = OrderedDataFrame(
+            DataFrameReference(snowpark_df, result_columns_quoted_identifiers),
+            projected_column_snowflake_quoted_identifiers=result_columns_quoted_identifiers,
+            ordering_columns=[OrderingColumn(row_position_snowflake_quoted_identifier)],
+            row_position_snowflake_quoted_identifier=row_position_snowflake_quoted_identifier,
+        )
+    else:
+        # If the string passed in to `pd.read_snowflake` is a SQL query, we can simply create
+        # a Snowpark DataFrame, and convert that to a Snowpark pandas DataFrame, and extract
+        # the OrderedDataFrame and row_position_snowflake_quoted_identifier from there.
+        try:
+            logical_plan = native_pd.DataFrame(
+                session.sql(f"explain ({table_name_or_query})").collect()
+            )
+            contains_order_by = _check_if_sql_query_contains_order_by_and_warn_user(
+                logical_plan
+            )
+        except SnowparkSQLException:
+            # We only error out if the query is a type of query that explain cannot be called on, e.g.
+            # a CALL query, or a query that cannot be compiled correctly, e.g. SELET * FROM table;
+            # which contains a typo. In that case, we assume that there is no ORDER BY clause.
+            contains_order_by = False
+        statement_params = get_default_snowpark_pandas_statement_params()
+        statement_params[STATEMENT_PARAMS.CONTAINS_ORDER_BY] = str(
+            contains_order_by
+        ).upper()
+        try:
+            # When we call `to_snowpark_pandas`, Snowpark will create a temporary table out of the
+            # Snowpark DataFrame, and then call `pd.read_snowflake` on it, which will create a
+            # Read only clone of that temporary table. We need to create the second table (instead
+            # of just using the temporary table Snowpark creates with a row position column as our
+            # backing table) because there are no guarantees that a temporary table cannot be modified
+            # so we lose the data isolation quality of pandas that we are attempting to replicate. By
+            # creating a read only clone, we ensure that the underlying data cannot be modified by anyone
+            # else.
+            snowpark_pandas_df = session.sql(table_name_or_query).to_snowpark_pandas()
+        except SnowparkSQLException as ex:
+            raise SnowparkPandasException(
+                f"Failed to create Snowpark pandas DataFrame out of query {table_name_or_query} with error {ex}",
+                error_code=SnowparkPandasErrorCode.GENERAL_SQL_EXCEPTION.value,
+            ) from ex
+        ordered_dataframe = (
+            snowpark_pandas_df._query_compiler._modin_frame.ordered_dataframe
+        )
+        row_position_snowflake_quoted_identifier = (
+            ordered_dataframe.row_position_snowflake_quoted_identifier
+        )
+    return ordered_dataframe, row_position_snowflake_quoted_identifier
+
+
+def generate_snowflake_quoted_identifiers_helper(
+    *,
+    pandas_labels: list[Hashable],
+    excluded: Optional[list[str]] = None,
+    wrap_double_underscore: Optional[bool] = False,
+) -> list[str]:
+    """
+    Args:
+        pandas_labels: a list of pandas labels to generate snowflake quoted identifiers for.
+            For debug-ability the newly generated name will be generated by appending a number to resolve name
+            conflicts.
+        excluded: a list of snowflake quoted identifiers as strings. If not None, generated snowflake identifiers
+            can not be from this list and will not have conflicts among themselves.
+            When excluded is None, not conflict resolution happens, generated snowflake quoted snowflake identifiers
+            may have conflicts.
+        wrap_double_underscore: optional parameter to wrap the resolved prefix to produce a name '__<resolved prefix>__'
+
+    Generate a unique snowflake quoted identifier for each label in `pandas_labels`, that does not
+    conflict with identifiers submitted by `excluded`. If treating label as snowflake quoted identifier
+    leads to a conflict, attempt once to resolve conflict by appending a random suffix.
+    Fail when single attempt of appending a random suffix does not resolve conflict. The default
+    length of the random suffix is defined through _NUM_SUFFIX_DIGITS.
+
+    Examples:
+        generate_snowflake_quoted_identifiers(excluded=['"A"', '"B"', '"C"'], pandas_label='X')
+            returns ['"X"'] because it doesn't conflict with 'A', 'B', 'C'
+        generate_snowflake_quoted_identifiers(excluded=['"A"', '"B"', '"C"'], pandas_label='A')
+            returns ['"A_<random_suffix>"'] because '"A"' already exists
+        generate_snowflake_quoted_identifiers(excluded=['"__A__"', '"B"', '"C"'], pandas_label='A')
+            returns ['"A"']
+        generate_snowflake_quoted_identifiers(excluded=['"__A__"', '"B"', '"C"'], pandas_label='A', wrap_double_underscore=True)
+            returns ['"__A_<random_suffix>__"'] (e.g., <random_suffix> can be "a1b2") because
+            wrapping 'A' with __ would conflict with '"__A__"'
+
+    Raises:
+        ValueError if we fail to resolve conflict after appending the random suffix.
+
+    Returns:
+        A list of Snowflake quoted identifiers, that are conflict free.
+
+    Note:
+        There is a similar version of function inside OrderedDataFrame, which should be
+        used in general. This method should only be used when an OrderedDataFrame is not available,
+        e.g., in `from_pandas()`.
+    """
+    resolve_conflicts = excluded is not None
+    if resolve_conflicts:
+        # verify that identifiers in 'excluded' are valid snowflake quoted identifiers.
+        for identifier in excluded:  # type: ignore[union-attr]
+            if not is_valid_snowflake_quoted_identifier(identifier):
+                raise ValueError(
+                    "'excluded' must have quoted identifiers."
+                    f" Found unquoted identifier='{identifier}'"
+                )
+        excluded_set = set(excluded)  # type: ignore[arg-type]
+    else:
+        excluded_set = set()
+    quoted_identifiers = []
+    for pandas_label in pandas_labels:
+        quoted_identifier = quote_name_without_upper_casing(
+            f"__{pandas_label}__" if wrap_double_underscore else f"{pandas_label}"
+        )
+        if resolve_conflicts:
+            num_retries = 0
+            while quoted_identifier in excluded_set and num_retries < _MAX_NUM_RETRIES:
+                if len(quoted_identifier) > _MAX_IDENTIFIER_LENGTH:
+                    quoted_identifier = quote_name_without_upper_casing(
+                        generate_column_identifier_random(_MAX_IDENTIFIER_LENGTH)
+                    )
+                else:
+                    suffix = generate_column_identifier_random()
+                    quoted_identifier = quote_name_without_upper_casing(
+                        f"__{pandas_label}_{suffix}__"
+                        if wrap_double_underscore
+                        else f"{pandas_label}_{suffix}"
+                    )
+                num_retries += 1
+
+            if quoted_identifier in excluded_set:
+                raise ValueError(
+                    f"Failed to generate quoted identifier for pandas label: '{pandas_label}' "
+                    f"the generated identifier '{quoted_identifier}' conflicts with {excluded_set}"
+                )
+        quoted_identifiers.append(quoted_identifier)
+        excluded_set.add(quoted_identifier)
+    return quoted_identifiers
+
+
+def generate_new_labels(
+    *, pandas_labels: list[Hashable], excluded: Optional[list[str]] = None
+) -> list[str]:
+    """
+    Helper function to generate new (string) pandas labels which do not conflict with the list of strings in excluded.
+
+     Args:
+        pandas_labels: a list of pandas labels to generate new string labels for.
+        excluded: a list of pandas string labels to exclude.
+            When excluded is None, not conflict resolution happens, generated string labels
+            may have conflicts.
+    """
+    if not excluded:
+        return list(map(str, pandas_labels))
+
+    excluded_set = set(excluded)
+    new_labels = []
+    for pandas_label in pandas_labels:
+        new_label = f"{pandas_label}"
+        num_retries = 0
+        while new_label in excluded_set and num_retries < _MAX_NUM_RETRIES:
+            if len(new_label) > _MAX_IDENTIFIER_LENGTH:
+                new_label = generate_column_identifier_random(_MAX_IDENTIFIER_LENGTH)
+            else:
+                suffix = generate_column_identifier_random()
+                new_label = f"{pandas_label}_{suffix}"
+            num_retries += 1
+
+        if new_label in excluded_set:
+            raise ValueError(  # pragma: no cover
+                f"Failed to generate string label {pandas_label} "
+                f"the generated label '{new_label}' conflicts with {excluded_set}"
+            )
+        new_labels.append(new_label)
+        excluded_set.add(new_label)
+    return new_labels
+
+
+def serialize_pandas_labels(pandas_labels: list[Hashable]) -> list[str]:
+    """
+    Serialize the hashable pandas labels into a string.  If it is a tuple, then json serialization is used as a best
+    effort for better readability, however, if it fails, then the regular string representation is used.
+    """
+    serialized_pandas_labels = []
+    for pandas_label in pandas_labels:
+        if isinstance(pandas_label, tuple):
+            try:
+                # We prefer a json compatible serialization of the pandas label for the column header so that we
+                # split the labels in SQL if needed.  For example, in transpose operation a multi-level pandas label
+                # column name like (a,b) would become transposed into 2 new index columns (a) and (b) values.  This
+                # currently does not handle cases where pandas label is not json serializable, so if there is a failure
+                # we will use the python string representation.
+                # TODO (SNOW-886400) Multi-level non-str pandas label not handled.
+                pandas_label = json.dumps(list(pandas_label))
+            except json.JSONDecodeError:
+                pass
+            except TypeError:
+                pass
+        pandas_label = str(pandas_label)
+        serialized_pandas_labels.append(pandas_label)
+    return serialized_pandas_labels
+
+
+def is_json_serializable_pandas_labels(pandas_labels: list[Hashable]) -> bool:
+    """
+    Returns True if all the pandas_labels can be json serialized.
+    """
+    for pandas_label in pandas_labels:
+        try:
+            json.dumps(pandas_label)
+        except (TypeError, ValueError):
+            return False
+
+    return True
+
+
+def unquote_name_if_quoted(name: str) -> str:
+    """
+    For a given name unquote the name if the name is quoted, and also unescape the
+    quotes in the name.
+    """
+    if name.startswith(DOUBLE_QUOTE) and name.endswith(DOUBLE_QUOTE):
+        name = name[1:-1]
+    return name.replace(DOUBLE_QUOTE + DOUBLE_QUOTE, DOUBLE_QUOTE)
+
+
+def extract_pandas_label_from_snowflake_quoted_identifier(
+    snowflake_identifier: str,
+) -> str:
+    """
+    This function extracts pandas label from given snowflake identifier.
+    To extract pandas label from snowflake identifier we simply remove surrounding double quotes and unescape quotes.
+
+    Args:
+        snowflake_identifier: a quoted snowflake identifier, must be a quoted string.
+
+    Examples:
+        extract_pandas_label_from_snowflake_quoted_identifier('"abc"') -> 'abc'
+        extract_pandas_label_from_snowflake_quoted_identifier('"a""bc"') -> 'a"bc'
+
+    Returns:
+        pandas label.
+    """
+    assert is_valid_snowflake_quoted_identifier(snowflake_identifier)
+    return snowflake_identifier[1:-1].replace(DOUBLE_QUOTE + DOUBLE_QUOTE, DOUBLE_QUOTE)
+
+
+def get_snowflake_quoted_identifier_to_pandas_label_mapping(
+    original_snowflake_quoted_identifiers_list: list[str],
+    original_pandas_labels: list[str],
+    new_snowflake_quoted_identifiers_list: str,
+) -> dict[str, str]:
+    """
+    This function maps a list of snowflake_quoted_identifiers to the corresponding pandas label.
+    If the snowflake_quoted_identifier is found in the input query compiler's data column list, then,
+    the pandas label is returned from the query compiler.  If it is not - we parse it and generate
+    the corresponding pandas label.
+    Args:
+        snowflake_quoted_identifiers: list of quoted snowflake identifiers, must be a list of quoted strings.
+        pandas_labels: list of pandas labels corresponding the quoted identifiers.
+    Returns:
+        map of snowflake_quoted_identifier to corresponding pandas label.
+    """
+    ret_val = {}
+
+    qc_column_names_map = dict(
+        zip(original_snowflake_quoted_identifiers_list, original_pandas_labels)
+    )
+
+    for snowflake_quoted_identifier in new_snowflake_quoted_identifiers_list:
+        if snowflake_quoted_identifier in qc_column_names_map:
+            ret_val[snowflake_quoted_identifier] = qc_column_names_map[
+                snowflake_quoted_identifier
+            ]
+        else:
+            ret_val[
+                snowflake_quoted_identifier
+            ] = extract_pandas_label_from_snowflake_quoted_identifier(
+                snowflake_quoted_identifier
+            )
+
+    return ret_val
+
+
+def parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label(
+    object_construct_snowflake_quoted_identifier: str,
+    num_levels: int,
+) -> tuple[Hashable, dict[str, Any]]:
+    """
+    This function parses the corresponding pandas label tuples as well as additional map keys from the json object
+    provided as a snowflake quoted identifier.  This is done by parsing into a map and then extracting the pandas label
+    tuple using a 0-based integer index look up.  Keys not related to the index look up (not in range(0,num_levels-1))
+    are returned as a separate key value map.
+
+    For example, '{"0":"abc","2":"ghi", "row": 10}' would return (("abc", None, "ghi"), {"row": 10})
+
+    For other examples of the indexed object construct extraction, see documentation for
+    extract_pandas_label_tuple_from_object_construct_snowflake_quoted_identifier
+
+    Arguments:
+        object_construct_snowflake_quoted_identifier: The snowflake quoted identifier.
+        num_levels: Number of levels in expected pandas labels
+
+    Returns:
+        Tuple containing the corresponding pandas labels and any other additional key, value pairs.
+    """
+    obj_construct_map = parse_snowflake_object_construct_identifier_to_map(
+        object_construct_snowflake_quoted_identifier
+    )
+
+    pandas_label = extract_pandas_label_from_object_construct_map(
+        obj_construct_map, num_levels
+    )
+    other_kw_map = extract_non_pandas_label_from_object_construct_map(
+        obj_construct_map, num_levels
+    )
+
+    return (pandas_label, other_kw_map)
+
+
+def parse_snowflake_object_construct_identifier_to_map(
+    object_construct_snowflake_quoted_identifier: str,
+) -> dict[Any, Any]:
+    """
+    Parse snowflake identifier based on object_construct (json object) into a map of the constituent parts.
+
+    Examples:
+        parse_snowflake_object_construct_identifier_to_map('"{""0"":""a"", ""2"":""b"", ""row"": 10}"')
+            returns {"0": "a", "2": "b", "row": 10}
+
+    Arguments:
+        object_construct_snowflake_quoted_identifier: snowflake quoted identifier
+
+    Returns:
+        dict of object_construct snowflake quoted identifier
+    """
+    identifiers_by_level_json_obj = (
+        extract_pandas_label_from_snowflake_quoted_identifier(
+            object_construct_snowflake_quoted_identifier
+        )
+    )
+    identifiers_by_level_json_obj = convert_snowflake_string_constant_to_python_string(
+        identifiers_by_level_json_obj
+    )
+
+    # SNOW-853416: There are other encoding issues to handle, but this double slash decode is to undo
+    # escaping that happens as part of the pivot snowflake to display column name.  For instance, a raw
+    # data value "shi\'ny" would pivot to column name "'shi\\''ny', we need to get back to the original.
+    identifiers_by_level_json_obj = identifiers_by_level_json_obj.replace("\\\\", "\\")
+    obj_construct_map = json.loads(identifiers_by_level_json_obj)
+
+    return obj_construct_map
+
+
+def extract_pandas_label_from_object_construct_map(
+    obj_construct_map: dict[Any, Any], num_levels: int
+) -> Hashable:
+    """
+    Extract pandas label from object_construct map
+
+    Examples:
+        extract_pandas_label_tuple_from_object_construct_map({"0": "a"}, 1) returns ("a")
+        extract_pandas_label_tuple_from_object_construct_map({"0": "a", "1": "b", "row": 10}, 2) returns ("a", "b")
+        extract_pandas_label_tuple_from_object_construct_map({"0": "a", "1": "b", "2": "c"}, 3) returns ("a", "b", "c")
+        extract_pandas_label_tuple_from_object_construct_map({"0": "a", "2": "b", "row": 10}, 4)
+            returns ("a", None, "b", None)
+
+    Arguments:
+        obj_construct_map: Map of object_construct key, values including positional pandas label
+        num_levels: Number of pandas label levels
+
+    Returns:
+        pandas label extracted from object_construct map
+    """
+    label_tuples = []
+
+    for level in range(num_levels):
+        level_str = str(level)
+        label_tuples.append(
+            obj_construct_map[level_str] if level_str in obj_construct_map else None
+        )
+
+    return to_pandas_label(tuple(label_tuples))
+
+
+def extract_non_pandas_label_from_object_construct_map(
+    obj_construct_map: dict[Any, Any], num_levels: int
+) -> dict[Any, Any]:
+    """
+    Extract non-pandas label from object_construct map
+
+    Examples:
+        extract_non_pandas_label_from_object_construct_map({"0": "a", "1": "b"}, 2) returns {}
+        extract_non_pandas_label_from_object_construct_map({"0": "a", "foo": "bar"}, 2) returns {"foo": "bar"}
+        extract_non_pandas_label_from_object_construct_map({"0": "a", "1": "b", "foo": "bar"}, 2) returns {"foo": "bar"}
+        extract_non_pandas_label_from_object_construct_map({"2": "val"}, 2) returns {"2": "val"}
+
+    Arguments:
+        obj_construct_map: Map of object_construct key, values including positional pandas label as well as other
+            key, value pairs.
+        num_levels: Number of pandas label levels
+
+    Returns:
+        Key value information from object_construct map not related to pandas label
+    """
+    non_pandas_label_map: dict[Any, Any] = {}
+
+    key_str_range = [str(num) for num in range(num_levels)]
+
+    for key in obj_construct_map.keys():
+        if key not in key_str_range:
+            non_pandas_label_map[key] = obj_construct_map[key]
+
+    return non_pandas_label_map
+
+
+def extract_pandas_label_from_object_construct_snowflake_quoted_identifier(
+    object_construct_snowflake_quoted_identifier: str,
+    num_levels: int,
+) -> Hashable:
+    """
+    This function extracts the corresponding pandas labels from a snowflake quoted identifier which was constructed
+    via object_construct key:value mapping using a 0-based integer index value.  The snowflake quoted identifier
+    is expected to be a valid json encoding.  For example, '{"0":"abc","2":"ghi"}' would extract pandas labels
+    as ("abc", None, "ghi"), here are more examples:
+
+    Examples:
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"0":"abc","1":"def"}', 2)
+            -> ("abc", "def")
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"0":"ab\\"c","1":"def"}', 2)
+            -> ('ab"c', "def")
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"0":"abc\\"\\"","1":"def"}', 2)
+            -> ('abc""', "def")
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"0":"\\",\\"abc","1":"def"}', 2)
+            -> ('","abc', "def")
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"0":"abc","2":"ghi"}', 3)
+            -> ("abc", None, "ghi")
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier('{"1":"def"}', 3) -> (None, "def", None)
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier("{}", 3) -> (None, None, None)
+
+    Arguments:
+        object_construct_snowflake_quoted_identifier: The snowflake quoted identifier.
+        num_levels: Number of levels in expected pandas labels
+
+    Returns:
+        Tuple containing the corresponding pandas labels.
+    """
+    return parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label(
+        object_construct_snowflake_quoted_identifier, num_levels
+    )[0]
+
+
+def is_valid_snowflake_quoted_identifier(identifier: str) -> bool:
+    """
+    Check whether identifier is a properly quoted Snowflake identifier or not
+    Performs following checks:
+    1. Length must be > 2
+    2. Must have surrounding quotes.
+    2. Double quotes which are part of identifier must be properly escaped.
+    Args:
+        identifier: string representing a Snowflake identifier
+    Returns:
+        True if input string is properly quoted snowflake identifier, False otherwise
+    """
+    return (
+        len(identifier) > 2
+        and identifier[0] == DOUBLE_QUOTE
+        and identifier[-1] == DOUBLE_QUOTE
+        and DOUBLE_QUOTE
+        not in identifier[1:-1].replace(DOUBLE_QUOTE + DOUBLE_QUOTE, EMPTY_STRING)
+    )
+
+
+def traceback_as_str_from_exception(exception_object: Exception) -> str:
+    """
+    return python traceback as string from exception object
+    Args:
+        exception_object: exception object
+
+    Returns: string containing description usually printed by interpreter to stderr
+
+    """
+    exception_lines = traceback.format_exception(
+        None, exception_object, exception_object.__traceback__
+    )
+    formatted_traceback = "".join(exception_lines)
+    return formatted_traceback
+
+
+def extract_all_duplicates(elements: Sequence[Hashable]) -> Sequence[Hashable]:
+    """
+    Find duplicated elements for the given list of elements.
+
+    Args:
+        elements: the list of elements to check for duplications
+    Returns:
+        List[Hashable]
+            list of unique elements that contains duplication in the original list
+    """
+    duplicated_elements = list(filter(lambda el: elements.count(el) > 1, elements))
+    unique_duplicated_elements = list(dict.fromkeys(duplicated_elements))
+
+    return unique_duplicated_elements
+
+
+def is_duplicate_free(names: Sequence[Hashable]) -> bool:
+    """
+    check whether names contains duplicates
+    Args:
+        names: sequence of hashable objects to check for duplicates based on __hash__ method. I.e., two
+               elements are considered duplicates if their hashes match.
+
+    Returns:
+        True if no duplicates, False else
+    """
+    return len(extract_all_duplicates(names)) == 0
+
+
+def assert_duplicate_free(names: Sequence[str], type: str) -> None:
+    """
+    Checks going one-by-one through the sequence 'names', by comparing if the current given name in the sequence
+    has been seen before or not. An assertion error is produced containing information about all element in names
+    that have duplicates.
+    Args:
+        names: A sequence of strings to check for duplicates
+        type: a string to describe the elements when producing the assertion error.
+
+    Returns:
+        None
+    """
+    duplicates = extract_all_duplicates(names)
+
+    if len(duplicates) == 1:
+        raise AssertionError(f"Found duplicate of type {type}: {duplicates[0]}")
+    elif len(duplicates) > 1:
+        raise AssertionError(f"Found duplicates of type {type}: {duplicates}")
+
+
+def to_pandas_label(label: LabelTuple) -> Hashable:
+    """
+    get the pandas label used for identify pandas column/rows in pandas dataframe
+    """
+    assert (
+        len(label) >= 1
+    ), "label in Snowpark pandas must have at least one label component"
+
+    if len(label) == 1:
+        return label[0]
+    else:
+        return label
+
+
+def from_pandas_label(pandas_label: Hashable, num_levels: int) -> LabelTuple:
+    """
+    Creates the internal pandas label representation with a given pandas hashable label
+    Args:
+        pandas_label: The pandas label to be convered to LabelTuple
+        num_levels: The number of levels of column as pandas index. If it is large than 1, it
+            indicates the label is not on a MultiIndex column and `pandas_label` must be a tuple
+            with length larger than 1.
+    Returns:
+        LabelTuple for internal label representation, which is Tuple[LabelComponent,...]
+    """
+    if num_levels > 1:
+        assert (
+            isinstance(pandas_label, tuple) and len(pandas_label) > 1
+        ), f"pandas label on MultiIndex column must be a tuple with length larger than 1, but got {pandas_label}"
+        return pandas_label
+    else:
+        return (pandas_label,)
+
+
+def is_all_label_components_none(pandas_label: Hashable) -> bool:
+    """
+    Check if a pandas label None, or if all tuple components are None if the pandas label is a tuple.
+    For example:
+        is_all_label_components_none returns True for pandas label like None, (None, None), but not for
+        label like (None, 'A').
+    """
+    if isinstance(pandas_label, tuple):
+        return all(item is None for item in pandas_label)
+    return pandas_label is None
+
+
+def fill_missing_levels_for_pandas_label(
+    pandas_label: Hashable,
+    target_num_levels: int,
+    origin_label_start_level: int,
+    fill_value: Hashable,
+) -> Hashable:
+    """
+    Given a pandas label, return a pandas label with target number of levels by filling up the missing levels
+    in front and after with given fill_value. The origin_label_start_level specifies level where the given pandas
+    label should start with in the final label. The final label will be in form like following:
+    (fill_value, ..., fill_value, pandas_label, fill_value, ... fill_value)
+
+     Examples:
+        1) fill_missing_levels_for_pandas_label(('a', 'b'), 3, 0, 'c') gives result ('a', 'b', 'c'). The
+            original label ('a', 'b') has 2 levels, and starts at level 0, there is 1 missing level to reach
+            target level 3, and the level is filled with 'c'.
+        2) fill_missing_levels_for_pandas_label(('a', 'b'), 4, 1, 'c') gives result ('c', 'a', 'b', 'c'). The
+            original label has two levels, and start at level 1 in the finale label, level 0 and level 3 is
+            missing and filled with fill_value 'c'.
+
+    Args:
+        pandas_label: The pandas label to fill missing levels with
+        target_num_levels: The target number of levels of the final pandas level
+        origin_label_start_level: The level where the pandas label should start with in the final label
+        fill_value: The value to fill for missing levels. If the filling value is a tuple, it is
+            treated as single label component for filling.
+
+    Returns:
+        pandas label with target_num_levels
+    Raise:
+        ValueError if the constructed label exceeds the target number of label levels
+
+
+    """
+    assert (
+        origin_label_start_level < target_num_levels
+    ), f"level for the pandas label to start {origin_label_start_level} exceeds the target label level {target_num_levels}"
+
+    if target_num_levels == 1:
+        # if target number of level is 1, no filling is needed
+        return pandas_label
+
+    label_tuple = pandas_label if isinstance(pandas_label, tuple) else (pandas_label,)
+
+    fill_value_tuple = (fill_value,)
+    final_label = fill_value_tuple * origin_label_start_level + label_tuple
+    missing = target_num_levels - len(final_label)
+    if missing < 0:
+        # the constructed label is exceeding the target level
+        raise ValueError(
+            f"Constructed Label has {target_num_levels - missing} levels, which is larger than target level {target_num_levels}."
+        )
+    # fill the missing levels
+    final_label += fill_value_tuple * missing
+
+    return final_label
+
+
+def create_ordered_dataframe_from_pandas(
+    df: native_pd.DataFrame,
+    snowflake_quoted_identifiers: list[str],
+    ordering_columns: Optional[list[OrderingColumn]] = None,
+    row_position_snowflake_quoted_identifier: Optional[str] = None,
+) -> OrderedDataFrame:
+    """
+    Create Ordered dataframe from pandas dataframe with proper pre-processing.
+    The preprocess includes:
+      - convert timedelta64 to int64
+      - replace all pandas missing values (np.nan, pd.NA, pd.NaT, None) in the values with None
+        before it is passed to snowpark create_dataframe, which will be mapped to NULL in Snowflake.
+
+    Args:
+        df: native_pd.DataFrame, the native pandas dataframe that is used to create the snowpark dataframe
+        snowflake_quoted_identifiers: List[str], list of snowflake quoted identifiers that are used as the
+            snowflake quoted identifiers for the snowpark dataframe
+        ordering_columns: a list of OrderingColumns
+        row_position_snowflake_quoted_identifier: a row position snowflake quoted identifier
+
+    Returns:
+        Ordered dataframe that is created out of the data of the given native pandas dataframe
+    """
+    assert len(snowflake_quoted_identifiers) == len(
+        df.columns
+    ), "length of snowflake quoted identifier must be the same as length of dataframe columns"
+
+    from snowflake.snowpark.modin.plugin._internal.type_utils import infer_series_type
+
+    # marker for the missing values for the original dataframe, pandas treated
+    # np.nan, pd.NA, pd.NaT and None all as missing value.
+    isna_data = df.isna().to_numpy().tolist()
+
+    struct_fields = []
+    for i, (label, _) in enumerate(df.dtypes.items()):
+        quoted_identifier = snowflake_quoted_identifiers[i]
+        snowflake_type = infer_series_type(df[label])
+        struct_fields.append(StructField(quoted_identifier, snowflake_type))
+
+    # in pandas, missing value can be represented ad np.nan, pd.NA, pd.NaT and None,
+    # and should be mapped to None in Snowpark input, and eventually be mapped to NULL
+    # in snowflake. Here we do one process to the data to replace all na values with None
+    # according to the isna_data of the original input dataframe.
+    data: list[list[Union[np.generic, Scalar]]] = df.to_numpy().tolist()
+    for x in range(len(data)):
+        for y in range(len(data[x])):
+            data[x][y] = (
+                None
+                if isna_data[x][y]
+                else (
+                    data[x][y].item()
+                    if isinstance(data[x][y], np.generic)
+                    else data[x][y]
+                )
+            )
+
+    snowpark_df = pd.session.create_dataframe(data, StructType(struct_fields))
+    return OrderedDataFrame(
+        DataFrameReference(snowpark_df, snowflake_quoted_identifiers),
+        projected_column_snowflake_quoted_identifiers=snowflake_quoted_identifiers,
+        ordering_columns=ordering_columns,
+        row_position_snowflake_quoted_identifier=row_position_snowflake_quoted_identifier,
+    )
+
+
+def fill_none_in_index_labels(
+    index_labels: list[Optional[Hashable]],
+    existing_labels: Optional[list[Hashable]] = None,
+) -> list[Hashable]:
+    """
+    Replaces None in index labels with string values. The behavior of this function is similar to
+    the assigned pandas labels for index columns when reset_index() is called. When index_labels
+    only has one element (i.e., single index), `INDEX_LABEL` is used to replace None if
+    `INDEX_LABEL` is not in existing labels, otherwise `LEVEL_LABEL_{pos}` is used; When
+    index_labels has more than one element (i.e., MultiIndex), `LEVEL_LABEL_{pos}` is used
+    to replace None.
+
+    Args:
+        index_labels: a list of index labels
+        existing_labels: a list of existing pandas labels (in the dataframe). To keep consistent
+            with pandas behavior, when `INDEX_LABEL` already exists and there is only single index,
+            we use `LEVEL_LABEL_{pos}` to replace None.
+    """
+    if not index_labels:
+        return []
+    if len(index_labels) == 1:
+        if index_labels[0]:
+            return index_labels
+        elif not existing_labels or INDEX_LABEL not in existing_labels:
+            return [INDEX_LABEL]
+        else:
+            return [f"{LEVEL_LABEL}_0"]
+    else:
+        return [label or f"{LEVEL_LABEL}_{i}" for i, label in enumerate(index_labels)]
+
+
+def is_snowpark_pandas_dataframe_or_series_type(obj: Any) -> bool:
+    # Return True if result is (Snowpark pandas) DataFrame/Series type.
+    # Note: Native pandas.DataFrame/Series return False
+    # Checking type name instead of using isinstance because of circle import.
+    class_type = type(obj)
+    if not class_type.__module__.startswith("snowflake.snowpark.modin.pandas"):
+        return False
+    return class_type.__name__ in {
+        "DataFrame",
+        "Series",
+    }
+
+
+# TODO: (SNOW-853334) Support other agg functions (any, all, prod, median, skew, kurt, sem, var, std, mad, etc)
+snowflake_pivot_agg_func_supported = [
+    count,
+    mean,
+    min_,
+    max_,
+    sum_,
+]
+
+
+def is_supported_snowflake_pivot_agg_func(agg_func: Callable) -> bool:
+    """
+    Check if the aggregation function is supported with snowflake pivot. Current supported
+    aggregation functions are the functions that can be mapped to snowflake builtin function.
+
+    Args:
+        agg_func: str or Callable. the aggregation function to check
+    Returns:
+        Whether it is valid to implement with snowflake or not.
+    """
+    return agg_func in snowflake_pivot_agg_func_supported
+
+
+def convert_snowflake_string_constant_to_python_string(identifier: str) -> str:
+    """
+    Convert a snowflake string constant to a python constant, this removes surrounding single quotes
+    and de-duplicates interior single quotes added in snowflake string.
+    """
+    if len(identifier) > 1 and identifier.startswith("'") and identifier.endswith("'"):
+        identifier = identifier[1:-1].replace("''", "'")
+    return identifier
+
+
+# TODO: (SNOW-857474) Refactor for common group by validations.
+def check_valid_pandas_labels(
+    pandas_labels: list[Hashable],
+    ref_pandas_labels: list[Hashable],
+) -> None:
+    """
+    Check pandas labels provided are within the reference pandas label list, and there are no duplicates.
+
+    Args:
+        pandas_labels: pandas labels
+        ref_pandas_labels: Reference pandas labels
+
+    Returns:
+        Raise an exception matching pandas library if not valid pandas label, otherwise returns None.
+    """
+    if None in pandas_labels:
+        raise TypeError("'NoneType' object is not callable")
+
+    found_pandas_labels = set()
+    for pandas_label in pandas_labels:
+        if pandas_label in found_pandas_labels:
+            raise ValueError(f"Grouper for '{pandas_label}' not 1-dimensional")
+        found_pandas_labels.add(pandas_label)
+        found_ref_cnt = len(
+            list(
+                filter(
+                    lambda label: label == pandas_label,
+                    ref_pandas_labels,
+                )
+            )
+        )
+        if found_ref_cnt == 0:
+            raise KeyError(pandas_label)
+        elif found_ref_cnt > 1:
+            raise ValueError(f"Grouper for '{pandas_label}' not 1-dimensional")
+
+
+def check_snowpark_pandas_object_in_arg(arg: Any) -> bool:
+    """
+    Return True if `arg` is a Snowpark pandas object (DataFrame or Series) or
+    contains any Snowpark pandas object. It is used for checking args and keywords of
+    Snowpark pandas api call, which and can be list, tuple or dict.
+    """
+    if isinstance(arg, (list, tuple)):
+        for v in arg:
+            if check_snowpark_pandas_object_in_arg(v):
+                return True
+    elif isinstance(arg, dict):
+        # Snowpark pandas object can't be the key because it's not hashable
+        # so we only need to check values
+        for v in arg.values():
+            if check_snowpark_pandas_object_in_arg(v):
+                return True
+    else:
+        from snowflake.snowpark.modin.pandas import DataFrame, Series
+
+        return isinstance(arg, (DataFrame, Series))
+
+    return False
+
+
+def snowpark_to_pandas_helper(
+    ordered_dataframe: OrderedDataFrame,
+    *,
+    statement_params: Optional[dict[str, str]] = None,
+    **kwargs: Any,
+) -> native_pd.DataFrame:
+    """
+    The helper function retrieves a pandas dataframe from an OrderedDataFrame. Performs necessary type
+    conversions for variant types on the client. This function issues 2 queries, one metadata query
+    to retrieve the schema and one query to retrieve the data values.
+
+    Args:
+        ordered_dataframe: Ordered Dataframe abstraction to convert to pandas Dataframe
+        statement_params: Dictionary of statement level parameters to be passed to conversion function of ordered dataframe abstraction.
+        kwargs: Additional keyword-only args to pass to internal `to_pandas` conversion for orderded dataframe abstraction.
+
+    Returns:
+        pandas dataframe
+    """
+
+    # Step 1: Retrieve schema of Snowpark dataframe and
+    # capture information about each quoted identifier and its corresponding datatype, store
+    # as list to keep information about order of columns.
+    columns_info = [
+        (f.column_identifier.quoted_name, f.datatype)
+        for f in ordered_dataframe.schema.fields
+    ]
+    column_identifiers = list(map(lambda t: t[0], columns_info))
+
+    # extract all identifiers with variant type
+    variant_type_columns_info = list(
+        filter(lambda t: isinstance(t[1], VariantType), columns_info)
+    )
+    variant_type_identifiers = list(map(lambda t: t[0], variant_type_columns_info))
+
+    # Step 2: Create for each variant type column a separate type column (append at end), and retrieve data values
+    # (and types for variant type columns).
+    variant_type_typeof_identifiers = (
+        ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[
+                f"{unquote_name_if_quoted(id)}_typeof"
+                for id in variant_type_identifiers
+            ],
+            excluded=column_identifiers,
+        )
+    )
+
+    if 0 != len(variant_type_identifiers):
+        ordered_dataframe = append_columns(
+            ordered_dataframe,
+            variant_type_typeof_identifiers,
+            [typeof(col(id)) for id in variant_type_identifiers],
+        )
+
+    # ensure that snowpark_df has unique identifiers, so the native pandas DataFrame object created here
+    # also does have unique column names which is a prerequisite for the post-processing logic following.
+    assert is_duplicate_free(
+        column_identifiers + variant_type_typeof_identifiers
+    ), "Snowpark DataFrame to convert must have unique column identifiers"
+    pandas_df = ordered_dataframe.to_pandas(statement_params=statement_params, **kwargs)
+
+    # Step 3: perform post-processing
+    # If the dataframe has no rows, do not perform this. Using the result of the `apply` on
+    # an empty frame would erroneously update the dtype of the column to be `float64` instead of `object`.
+    # TODO SNOW-982779: verify correctness of this behavior
+    if pandas_df.shape[0] != 0:
+        if 0 != len(variant_type_identifiers):
+            # Step 3a: post-process variant type columns, if any exist.
+            id_to_label_mapping = dict(
+                zip(
+                    column_identifiers + variant_type_typeof_identifiers,
+                    pandas_df.columns,
+                )
+            )
+
+            def convert_variant_type_to_pandas(row: native_pd.Series) -> Any:
+                value, type = row
+
+                # pass NULL, None, NaN values through
+                if native_pd.isna(value):
+                    return value
+
+                # Decode time-related strings via to_datetime.
+                if type in ["TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP_TZ"]:
+                    return native_pd.to_datetime(value.replace('"', ""))
+                if type == "DATE":
+                    return native_pd.to_datetime(value.replace('"', "")).date()
+                if type == "TIME":
+                    return native_pd.to_datetime(value.replace('"', "")).time()
+                # Variant is stored as JSON internally, so decode here always.
+                return json.loads(value)
+
+            for id, type_of_id in zip(
+                variant_type_identifiers, variant_type_typeof_identifiers
+            ):
+                pandas_df[id_to_label_mapping[id]] = pandas_df[
+                    [id_to_label_mapping[id], id_to_label_mapping[type_of_id]]
+                ].apply(convert_variant_type_to_pandas, axis=1)
+
+        if any(isinstance(dtype, (ArrayType, MapType)) for (_, dtype) in columns_info):
+            # Step 3b: post-process semi-structured columns for both ArrayType, MapType as they are by
+            #          default returned as strings when converting the Snowpark Dataframe via to_pandas.
+            id_to_label_mapping = dict(zip(column_identifiers, pandas_df.columns))
+            for quoted_name, datatype in columns_info:
+                if isinstance(datatype, (ArrayType, MapType)):
+                    pandas_df[id_to_label_mapping[quoted_name]] = pandas_df[
+                        id_to_label_mapping[quoted_name]
+                    ].apply(lambda value: None if value is None else json.loads(value))
+
+    # Return the original amount of columns by stripping any typeof(...) columns appended if
+    # schema contained VariantType.
+    return pandas_df[pandas_df.columns[: len(columns_info)]]
+
+
+def is_integer_list_like(arr: Union[list, np.ndarray]) -> bool:
+    """
+    check whether array is list-like made up of integer like arguments
+    Args:
+        arr: list-like to check for
+
+    Returns: True if only contains integers, False else
+
+    """
+    if isinstance(arr, np.ndarray):
+        return is_integer_dtype(arr.dtype)
+
+    return all([is_integer_dtype(type(val)) for val in arr])
+
+
+def label_prefix_match(
+    label: Hashable,
+    prefix_map: dict[Hashable, Any],
+    level: Optional[int] = None,
+) -> Optional[Any]:
+    """
+    Get the value from the 'prefix_map' if label match the key or prefix of the key (key is a tuple). For example,
+        1. label: "a", prefix_map: {"a": 3}; return 3
+        2. label: ("a", "b"), prefix_map: {"a": 3}; return 3
+        3. label: ("a", "b"), prefix_map: {("a", "b"): 3}; return 3
+        4. label: ("a", "b", "c"), prefix_map: {("a", "b"): 3}; return 3
+        5. label: "a", prefix_map: {"b": 3}; return None
+        6. label: ("a", "b"), prefix_map: {("a", "c"): 3}; return None
+        7. label: ("a", "b"), prefix_map: {("a", None): 3}; return None
+    Note that if multiple matches exist, it always respect the first match, for example,
+        8. label: ("a", "b"), prefix_map: {("a",): 3, ("a", "b"): 4}; return 3
+    If 'level' is given we only check against given level of 'label'.
+        9. label: ("a", "b"), prefix_map: {"a": 3}, level=0; return 3
+        10. label: ("a", "b"), prefix_map: {"a": 3}, level=1; return None
+        11. label: ("a", "b"), prefix_map: {"b": 3}, level=1; return 3
+    Args:
+        label: dataframe column label
+        prefix_map: label to value map
+        level: optional level of label to match against. If provided we only check
+            against given level of label, otherwise, we check against all levels of
+            label including prefixes. Note this argument is ignored if 'label' is not
+            a tuple which implies we have single level index.
+
+    Returns:
+        The matched value if label match any key or prefix of a key. Otherwise, None is returned
+    """
+    # Single level index
+    if not isinstance(label, tuple):
+        return prefix_map.get(label)
+
+    # MultiIndex, match against specified 'level'
+    if level is not None:
+        if level >= len(label):
+            return None
+        return prefix_map.get(label[level])
+
+    # MultiIndex, match against all levels
+    for key, value in prefix_map.items():
+        if label == key or label[0] == key:
+            return value
+        for i in range(1, len(label)):
+            if label[:i] == key:
+                return value
+    return None
+
+
+def fillna_label_to_value_map(
+    value: dict, columns: native_pd.Index
+) -> dict[Hashable, Any]:
+    """
+    Create a mapping from pandas label to fillna scalar value. The main purpose for this helper method is to handle
+    multiindex correctly.
+    Args:
+        value: preprocessed value in a mapping
+        columns: the column index of the dataframe to be filled
+
+    Returns:
+        label_to_value_map for fillna
+    """
+    label_to_value_map = {}
+    for label in columns:
+        val = label_prefix_match(label, value)
+        if val is not None:
+            label_to_value_map[label] = val
+    return label_to_value_map
+
+
+def convert_numpy_pandas_scalar_to_snowpark_literal(value: Any) -> LiteralType:
+    """
+    Converts a numpy scalar value, or a pandas Timestamp, or a pandas NA value
+    to a Snowpark literal value.
+    """
+    assert is_scalar(value), f"{value} is not a scalar"
+    if isinstance(value, np.generic):
+        value = value.item()
+    elif isinstance(value, native_pd.Timestamp):
+        value = value.to_pydatetime()
+    elif native_pd.isna(value):
+        value = None
+    return value
+
+
+def pandas_lit(value: Any, datatype: Optional[DataType] = None) -> Column:
+    """
+    Returns a Snowpark column for a literal value. Being differnet from Snowpark's lit()
+    function, it also handles numpy scalar values and pandas Timestamp and pandas NA values.
+    TODO SNOW-904405: Whether and how to support lit() for pandas and numpy types is still
+    an open question for Snowpark Python API. Further discussion is need to have a more general
+    approach.
+    Args:
+        value: value can be Python object or Snowpark Column
+        datatype: optional datatype, if None infer datatype from value
+
+    Returns:
+        Snowpark Column expression
+    """
+    value = (
+        convert_numpy_pandas_scalar_to_snowpark_literal(value)
+        if is_scalar(value)
+        else value
+    )
+
+    if isinstance(value, Column):
+        return value  # pragma: no cover
+    elif isinstance(value, native_pd.DateOffset):
+        # Construct an Interval from DateOffset
+        from snowflake.snowpark.modin.plugin._internal.timestamp_utils import (
+            convert_dateoffset_to_interval,
+        )
+
+        return Column(convert_dateoffset_to_interval(value))
+    else:
+        # Construct a Literal directly in order to pass in `datatype`. `lit()` function does not support datatype.
+        return Column(Literal(value, datatype))
+
+
+def is_repr_truncated(
+    row_count: int, col_count: int, num_rows_to_display: int, num_cols_to_display: int
+) -> bool:
+    """
+    check whether result is truncated and information {} rows x {} columns should be displayed or not.
+    Args:
+        row_count: number of rows DataFrame or Series contains
+        col_count: number of columns DataFrame contains
+        num_rows_to_display: 0 to display all (no truncation), else number of rows to display
+        num_cols_to_display: 0 to display all (no truncation), else number of columns to display
+
+    Returns:
+        True if information should be added, else False
+    """
+    if 0 == num_cols_to_display and 0 == num_rows_to_display:
+        return False
+    elif 0 == num_cols_to_display:
+        return row_count > num_rows_to_display
+    elif 0 == num_rows_to_display:
+        return col_count > num_cols_to_display
+    else:
+        return row_count > num_rows_to_display or col_count > num_cols_to_display
+
+
+def try_convert_to_simple_slice(s: Any) -> Optional[slice]:
+    """
+    Try converting to simple slice. Only slice or range like object with step 1 and non-negative start and stop are
+    counted as simple slice and return; Otherwise, None will be returned.
+
+    Args:
+        s: the input to convert to slice
+
+    Returns:
+        The simple slice if possible; otherwise None.
+    """
+    from snowflake.snowpark.modin.pandas.indexing import is_range_like
+
+    if not isinstance(s, slice) and not is_range_like(s):
+        return None
+    if s.step is None and s.start is None and s.stop is None:
+        return None
+    # start can be None or non-negative
+    if s.start is not None and s.start < 0:
+        return None
+    start = s.start if s.start is not None else 0
+    # stop can only be non-negative
+    if s.stop is None or s.stop < 0:
+        return None
+    stop = s.stop
+    if s.step is not None and s.step != 1:
+        return None
+    return slice(start, stop, 1)
+
+
+def get_mapping_from_left_to_right_columns_by_label(
+    left_pandas_labels: list[Hashable],
+    left_snowflake_quoted_identifiers: list[str],
+    right_pandas_labels: list[Hashable],
+    right_snowflake_quoted_identifiers: list[str],
+) -> dict[str, Optional[str]]:
+    """
+    For each column on the left, find the column on the right that has the same pandas label, and produce a mapping
+    between the left quoted identifier to the corresponding identifier on the right:
+    1) if there is exactly one match on right, the left quoted identifier is mapped to the quoted identifier for
+        the column on the right.
+    2) if there is no match, the left quoted identifier is mapped to None
+    e) if there is more than one match, the last matching column is used.
+
+    For example:
+     left_pandas_labels are ['A', 'B', 'C', 'C', 'E'], left_snowflake_quoted_identifiers are ['"A"', '"B"', '"C"', '"C_l1"', '"E"']
+     right_pandas_labels are ['A', 'B', 'B', 'C', 'F'], left_snowflake_quoted_identifiers are ['"A_r"', '"B_r"', '"B_r1"', '"C_r"', '"F"']
+     The result mapping should be
+        {('"A"', '"A_r"'), ('"B"', '"B_r1"'), ('"C"', '"C_r1"'), ('"C_l1"', '"C_r"'), ('"E"', None)}
+    """
+    # the length of the left_pandas_labels and left_snowflake_quoted_identifiers must be the same
+    assert len(left_pandas_labels) == len(left_snowflake_quoted_identifiers)
+    # the length of the right_pandas_labels and right_snowflake_quoted_identifiers must be the same
+    assert len(right_pandas_labels) == len(right_snowflake_quoted_identifiers)
+
+    left_identifier_to_right_identifier: dict[str, Optional[str]] = {}
+    # construct a map from right pandas label to right snowflake quoted_identifiers, since
+    # python is an ordered map, if the same label occurred more than once, the last occurrence
+    # is used.
+    right_pandas_label_to_identifier_map = dict(
+        zip(right_pandas_labels, right_snowflake_quoted_identifiers)
+    )
+    for pandas_label, quoted_identifier in zip(
+        left_pandas_labels, left_snowflake_quoted_identifiers
+    ):
+        left_identifier_to_right_identifier[
+            quoted_identifier
+        ] = right_pandas_label_to_identifier_map.get(pandas_label, None)
+    return left_identifier_to_right_identifier
+
+
+def generate_column_identifier_random(n: int = _NUM_SUFFIX_DIGITS) -> str:
+    """
+    Generates a suffix for a Snowflake column identifier.
+    This function can be used to de-duplicate the column identifier when
+    pandas labels are duplicate.
+    The default value of num_digits is 4 (_NUM_SUFFIX_DIGITS). The probability of a conflict is
+    1 / ((26 + 10) ** 4) ~= 1e-7 (26 is the number of lower case letters
+    and 10 is the number of digits), which is pretty small.
+    """
+    return generate_random_alphanumeric(n)
+
+
+def get_distinct_rows(df: OrderedDataFrame) -> OrderedDataFrame:
+    """
+    Returns a new Snowpark DataFrame that contains only the rows with
+    distinct values from the current Snowpark DataFrame.
+    """
+    return df.group_by(df.projected_column_snowflake_quoted_identifiers)
+
+
+def count_rows(df: OrderedDataFrame) -> int:
+    """
+    Returns the number of rows of a Snowpark DataFrame.
+    """
+    df = df.ensure_row_count_column()
+    rowset = (
+        df.select(df.row_count_snowflake_quoted_identifier)
+        ._dataframe_ref.snowpark_dataframe.select(
+            df.row_count_snowflake_quoted_identifier
+        )
+        .limit(1)
+        .collect()
+    )
+    return 0 if len(rowset) == 0 else rowset[0][0]
+
+
+def append_columns(
+    df: OrderedDataFrame,
+    column_identifiers: Union[str, list[str]],
+    column_objects: Union[Column, list[Column]],
+) -> OrderedDataFrame:
+    """
+    Appends Snowpark columns to the end of a Snowpark DataFrame.
+    If there are conflicting column identifiers, a ValueError will be raised.
+
+    Args:
+        df: Snowpark DataFrame
+        column_identifiers: A list of column identifiers to be appended
+        column_objects: A list of column objects to be appended
+
+    Example 1:
+
+        A DataFrame `df` with columns [A, B, C], we call append_columns(df, "D", col(...)),
+        the result DataFrame has columns [A, B, C, D].
+
+    Example 2:
+
+        A DataFrame `df` with columns [A, B, C], we call append_columns(df, "C", col(...)),
+        a ValueError will be raised.
+    """
+    if isinstance(column_identifiers, str):
+        column_identifiers = [column_identifiers]
+    if isinstance(column_objects, Column):
+        column_objects = [column_objects]
+    assert len(column_identifiers) == len(
+        column_objects
+    ), f"The number of column identifiers ({len(column_identifiers)}) is not equal to the number of column objects ({len(column_objects)})"
+
+    existing_columns = df.projected_column_snowflake_quoted_identifiers
+    new_columns = [
+        column_object.as_(column_identifier)
+        for column_identifier, column_object in zip(column_identifiers, column_objects)
+    ]
+    return df.select(*existing_columns, *new_columns)
+
+
+def cache_result(ordered_dataframe: OrderedDataFrame) -> OrderedDataFrame:
+    """
+    Cache the Snowpark dataframe result by creating a temp table out of the dataframe, and
+    then recreate a snowpark dataframe out of the temp table.
+    """
+    session = ordered_dataframe.session
+    temp_table_name = f'{session.get_current_database()}.{session.get_current_schema()}."{random_name_for_temp_object(TempObjectType.TABLE)}"'
+    ordered_dataframe.write.save_as_table(
+        temp_table_name,
+        mode="errorifexists",
+        table_type="temporary",
+        statement_params=get_default_snowpark_pandas_statement_params(),
+    )
+    return OrderedDataFrame(
+        DataFrameReference(session.table(temp_table_name)),
+        projected_column_snowflake_quoted_identifiers=ordered_dataframe.projected_column_snowflake_quoted_identifiers,
+        ordering_columns=ordered_dataframe.ordering_columns,
+        row_position_snowflake_quoted_identifier=ordered_dataframe.row_position_snowflake_quoted_identifier,
+    )
+
+
+def create_frame_with_data_columns(
+    frame: "frame.InternalFrame", data_column_pandas_labels: list[Hashable]
+) -> "frame.InternalFrame":
+    """
+    Returns a new InternalFrame whose data columns
+    are based on both the values and order of `pandas_labels`.
+
+    Parameters
+    ----------
+    frame : InternalFrame
+        Frame to select from.
+
+    data_column_pandas_labels : List[Hashable]
+        Subset of `frame`'s data columns to include in the new frame.
+
+    Returns
+    -------
+    InternalFrame
+        New InternalFrame with updated data columns.
+
+    Raises
+    ------
+    ValueError
+        If a label in `data_column_pandas_labels` does not exist in `frame`.
+    """
+    from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+
+    new_frame_data_column_pandas_labels = []
+    new_frame_data_column_snowflake_quoted_identifier = []
+
+    data_column_label_to_snowflake_quoted_identifier = {
+        data_column_pandas_label: data_column_snowflake_quoted_identifier
+        for data_column_pandas_label, data_column_snowflake_quoted_identifier in zip(
+            frame.data_column_pandas_labels,
+            frame.data_column_snowflake_quoted_identifiers,
+        )
+    }
+
+    for pandas_label in data_column_pandas_labels:
+        snowflake_quoted_identifier = (
+            data_column_label_to_snowflake_quoted_identifier.get(pandas_label, None)
+        )
+
+        if snowflake_quoted_identifier is None:
+            raise ValueError(f"Label {pandas_label} does not exist in frame's columns.")
+
+        new_frame_data_column_pandas_labels.append(pandas_label)
+        new_frame_data_column_snowflake_quoted_identifier.append(
+            snowflake_quoted_identifier
+        )
+
+    return InternalFrame.create(
+        ordered_dataframe=frame.ordered_dataframe,
+        data_column_pandas_labels=new_frame_data_column_pandas_labels,
+        data_column_snowflake_quoted_identifiers=new_frame_data_column_snowflake_quoted_identifier,
+        data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        index_column_pandas_labels=frame.index_column_pandas_labels,
+        index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+    )
+
+
+def rindex(lst: list, value: int) -> int:
+    """Find the last index in the list of item value."""
+    return len(lst) - lst[::-1].index(value) - 1
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/where_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/where_utils.py
new file mode 100644
index 00000000000..b857682ae47
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/where_utils.py
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.types import BooleanType
+
+
+def validate_expected_boolean_data_columns(frame: InternalFrame) -> None:
+    """
+    Checks if the data column types of the frame are all boolean types.  If not, will raise an exception.
+
+    Args:
+        frame: The internal frame
+
+    Returns:
+        None
+    """
+    frame_snowflake_identifier_to_data_type_map = (
+        frame.quoted_identifier_to_snowflake_type()
+    )
+
+    if not all(
+        isinstance(
+            frame_snowflake_identifier_to_data_type_map.get(
+                snowflake_quoted_identifier
+            ),
+            BooleanType,
+        )
+        for snowflake_quoted_identifier in frame.data_column_snowflake_quoted_identifiers
+    ):
+        raise ValueError("Boolean array expected for the condition, not object")
diff --git a/src/snowflake/snowpark/modin/plugin/_internal/window_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/window_utils.py
new file mode 100644
index 00000000000..0a9dba8708f
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_internal/window_utils.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+#
+# This file contains utils functions used by the groupby functionalities.
+#
+#
+from typing import Any
+
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+
+IMPLEMENTED_AGG_FUNCS = ["sum", "mean", "var", "std", "min", "max"]
+
+
+def check_is_window_supported_by_snowflake(rolling_kwargs: dict[str, Any]) -> None:
+    """
+    Check if execution with snowflake engine is available for the window operation.
+
+    Parameters
+    ----------
+    rolling_kwargs: keyword arguments passed for to rolling. The rolling keywords handled in the
+        function contains:
+        window: int, timedelta, str, offset, or BaseIndexer subclass. Size of the moving window.
+            If an integer, the fixed number of observations used for each window.
+            If a timedelta, str, or offset, the time period of each window. Each window will be a variable sized based on the observations included in the time-period. This is only valid for datetimelike indexes.
+            If a BaseIndexer subclass, the window boundaries based on the defined get_window_bounds method. Additional rolling keyword arguments, namely min_periods, center, closed and step will be passed to get_window_bounds.
+        min_periods: int, default None.
+            Minimum number of observations in window required to have a value; otherwise, result is np.nan.
+            For a window that is specified by an offset, min_periods will default to 1.
+            For a window that is specified by an integer, min_periods will default to the size of the window.
+        center: bool, default False.
+            If False, set the window labels as the right edge of the window index.
+            If True, set the window labels as the center of the window index.
+        win_type: str, default None
+            If None, all points are evenly weighted.
+            If a string, it must be a valid scipy.signal window function.
+            Certain Scipy window types require additional parameters to be passed in the aggregation function. The additional parameters must match the keywords specified in the Scipy window type method signature.
+        on: str, optional
+            For a DataFrame, a column label or Index level on which to calculate the rolling window, rather than the DataFrame’s index.
+            Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window.
+        axis: int or str, default 0
+            If 0 or 'index', roll across the rows.
+            If 1 or 'columns', roll across the columns.
+            For Series this parameter is unused and defaults to 0.
+        closed: str, default None
+            If 'right', the first point in the window is excluded from calculations.
+            If 'left', the last point in the window is excluded from calculations.
+            If 'both', the no points in the window are excluded from calculations.
+            If 'neither', the first and last points in the window are excluded from calculations.
+            Default None ('right').
+        step: int, default None
+            Evaluate the window at every step result, equivalent to slicing as [::step]. window must be an integer. Using a step argument other than None or 1 will produce a result with a different shape than the input.
+        method: str {‘single’, ‘table’}, default ‘single’
+            Execute the rolling operation per single column or row ('single') or over the entire object ('table').
+            This argument is only implemented when specifying engine='numba' in the method call.
+
+    Returns
+    -------
+    bool
+        Whether operations can be executed with snowflake sql engine.
+    """
+    # Snowflake pandas implementation only supports integer window_size, min_periods >= 1, and center on axis = 0
+    window = rolling_kwargs.get("window")
+    min_periods = rolling_kwargs.get("min_periods")
+    win_type = rolling_kwargs.get("win_type")
+    on = rolling_kwargs.get("on")
+    axis = rolling_kwargs.get("axis", 0)
+    closed = rolling_kwargs.get("closed")
+    step = rolling_kwargs.get("step")
+    # Method is only used for the numba engine, so no need to check the param/raise a warning to the user.
+
+    # Raise not implemented error for unsupported params
+    if not isinstance(window, int):
+        ErrorMessage.method_not_implemented_error(
+            name="Non-integer window", class_="Rolling"
+        )
+    if min_periods is None or min_periods == 0:
+        ErrorMessage.method_not_implemented_error(
+            name=f"min_periods {min_periods}", class_="Rolling"
+        )
+    if win_type:
+        ErrorMessage.method_not_implemented_error(
+            name="win_type", class_="Rolling"
+        )  # pragma: no cover
+    if on:
+        ErrorMessage.method_not_implemented_error(
+            name="on", class_="Rolling"
+        )  # pragma: no cover
+    if axis not in (0, "index"):
+        # Note that this is deprecated since pandas 2.1.0
+        ErrorMessage.method_not_implemented_error(
+            name="axis = 1", class_="Rolling"
+        )  # pragma: no cover
+    if closed:
+        ErrorMessage.method_not_implemented_error(
+            name="closed", class_="Rolling"
+        )  # pragma: no cover
+    if step:
+        ErrorMessage.method_not_implemented_error(
+            name="step", class_="Rolling"
+        )  # pragma: no cover
diff --git a/src/snowflake/snowpark/modin/plugin/_typing.py b/src/snowflake/snowpark/modin/plugin/_typing.py
new file mode 100644
index 00000000000..b54e88b1ea0
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/_typing.py
@@ -0,0 +1,89 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from collections.abc import Hashable
+from typing import Literal, NamedTuple, Optional, Union
+
+import numpy as np
+
+# Snowpark pandas API always treats the pandas label as a tuple(LabelComponent), when the length of tuple is > 1,
+# it represents multi-index, otherwise it is single level.
+LabelComponent = Hashable
+LabelTuple = tuple[LabelComponent, ...]
+# can be removed once move to pandas 2.0
+DropKeep = Literal["first", "last", False]
+
+# pandas defines list-like as objects that are considered list-like are for example Python lists, tuples, sets, NumPy arrays,
+# and pandas Series according to https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_list_like.html. Define
+# them for Snowpark pandas here. Note that we exclude Snowpark pandas Series here explicitly.
+ListLike = Union[set, list, tuple, np.ndarray]
+
+ListLikeOfFloats = Union[set[float], list[float], tuple[float]]
+
+
+class LabelIdentifierPair(NamedTuple):
+    """
+    pair between pandas label and the corresponding snowflake quoted identifier.
+    """
+
+    # Internal representation for pandas label used to access pandas dataframe
+    label: LabelTuple
+    # Used to access the snowpark dataframe with data in snowflake
+    snowflake_quoted_identifier: str
+
+
+JoinTypeLit = Literal["left", "right", "inner", "outer", "cross"]
+AlignTypeLit = Literal[
+    # If align column values matches exactly, merge frames line by line (this is
+    # equivalent to joining on row position) otherwise perform LEFT OUTER JOIN on
+    # align columns.
+    "left",
+    # If align column values matches exactly, merge frames line by line (this is
+    # equivalent to joining on row position) otherwise perform FULL OUTER JOIN on
+    # align columns.
+    "outer",
+    # If align column values matches exactly, merge frames line by line (this is
+    # equivalent to joining on row position) otherwise
+    # - perform LEFT OUTER JOIN if left frame is non-empty
+    # - perform RIGHT OUTER JOIN if left frame is empty
+    "coalesce",
+]  # right and inner can also be supported if needed
+
+SnowflakeSupportedFileTypeLit = Union[
+    Literal["csv"], Literal["json"], Literal["parquet"]
+]
+
+
+class PandasLabelToSnowflakeIdentifierPair(NamedTuple):
+    """
+    Pair between pandas label and the corresponding snowflake quoted identifier.
+    """
+
+    # pandas label
+    pandas_label: Optional[Hashable]
+    # Snowflake quoted identifier
+    snowflake_quoted_identifier: str
+
+
+# once updated to pandas 2.0, remove this, because this can be directly imported from pandas._typing
+InterpolateOptions = Literal[
+    "linear",
+    "time",
+    "index",
+    "values",
+    "nearest",
+    "zero",
+    "slinear",
+    "quadratic",
+    "cubic",
+    "barycentric",
+    "polynomial",
+    "krogh",
+    "piecewise_polynomial",
+    "spline",
+    "pchip",
+    "akima",
+    "cubicspline",
+    "from_derivatives",
+]
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/__init__.py b/src/snowflake/snowpark/modin/plugin/compiler/__init__.py
new file mode 100644
index 00000000000..7be4dbcff73
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/compiler/__init__.py
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""The module represents the base query compiler that defines the common query compiler API."""
+
+from snowflake.snowpark.modin.plugin.compiler.query_compiler import BaseQueryCompiler
+
+__all__ = ["BaseQueryCompiler"]
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/doc_utils.py b/src/snowflake/snowpark/modin/plugin/compiler/doc_utils.py
new file mode 100644
index 00000000000..fe61d590b69
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/compiler/doc_utils.py
@@ -0,0 +1,696 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Module contains decorators for documentation of the query compiler methods."""
+
+from functools import partial
+
+# from modin.utils import append_to_docstring, format_string, align_indents
+from snowflake.snowpark.modin.utils import (
+    align_indents,
+    append_to_docstring,
+    format_string,
+)
+
+_one_column_warning = """
+.. warning::
+    This method is supported only by one-column query compilers.
+"""
+
+_deprecation_warning = """
+.. warning::
+    This method duplicates logic of ``{0}`` and will be removed soon.
+"""
+
+_refer_to_note = """Notes
+-----
+Please refer to ``modin.pandas.{0}`` for more information
+about parameters and output format.
+"""
+
+
+add_one_column_warning = append_to_docstring(_one_column_warning)
+
+
+def add_deprecation_warning(replacement_method):
+    """
+    Build decorator which appends deprecation warning to the function's docstring.
+
+    Appended warning indicates that the current method duplicates functionality of
+    some other method and so is slated to be removed in the future.
+
+    Parameters
+    ----------
+    replacement_method : str
+        Name of the method to use instead of deprecated.
+
+    Returns
+    -------
+    callable
+    """
+    message = _deprecation_warning.format(replacement_method)
+    return append_to_docstring(message)
+
+
+def add_refer_to(method):
+    """
+    Build decorator which appends link to the high-level equivalent method to the function's docstring.
+
+    Parameters
+    ----------
+    method : str
+        Method name in ``modin.pandas`` module to refer to.
+
+    Returns
+    -------
+    callable
+    """
+    # FIXME: this would break numpydoc if there already is a `Notes` section
+    note = _refer_to_note.format(method)
+    return append_to_docstring(note)
+
+
+def doc_qc_method(
+    template,
+    params=None,
+    refer_to=None,
+    refer_to_module_name=None,
+    one_column_method=False,
+    **kwargs,
+):
+    """
+    Build decorator which adds docstring for query compiler method.
+
+    Parameters
+    ----------
+    template : str
+        Method docstring in the NumPy docstyle format. Must contain {params}
+        placeholder.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        in the `template`. `params` string should not include the "Parameters"
+        header.
+    refer_to : str, optional
+        Method name in `refer_to_module_name` module to refer to for more information
+        about parameters and output format.
+    refer_to_module_name : str, optional
+    one_column_method : bool, default: False
+        Whether to append note that this method is for one-column
+        query compilers only.
+    **kwargs : dict
+        Values to substitute in the `template`.
+
+    Returns
+    -------
+    callable
+    """
+    params_template = """
+
+        Parameters
+        ----------
+        {params}
+        """
+
+    params = format_string(params_template, params=params) if params else ""
+    substituted = format_string(template, params=params, refer_to=refer_to, **kwargs)
+    if refer_to_module_name:
+        refer_to = f"{refer_to_module_name}.{refer_to}"
+
+    def decorator(func):
+        func.__doc__ = substituted
+        appendix = ""
+        if refer_to:
+            appendix += _refer_to_note.format(refer_to)
+        if one_column_method:
+            appendix += _one_column_warning
+        if appendix:
+            func = append_to_docstring(appendix)(func)
+        return func
+
+    return decorator
+
+
+def doc_binary_method(operation, sign, self_on_right=False, op_type="arithmetic"):
+    """
+    Build decorator which adds docstring for binary method.
+
+    Parameters
+    ----------
+    operation : str
+        Name of the binary operation.
+    sign : str
+        Sign which represents specified binary operation.
+    self_on_right : bool, default: False
+        Whether `self` is the right operand.
+    op_type : {"arithmetic", "logical", "comparison"}, default: "arithmetic"
+        Type of the binary operation.
+
+    Returns
+    -------
+    callable
+    """
+    template = """
+    Perform element-wise {operation} (``{verbose}``).
+
+    If axes are not equal, perform frames alignment first.
+
+    Parameters
+    ----------
+    other : BaseQueryCompiler, scalar or array-like
+        Other operand of the binary operation.
+    broadcast : bool, default: False
+        If `other` is a one-column query compiler, indicates whether it is a Series or not.
+        Frames and Series have to be processed differently, however we can't distinguish them
+        at the query compiler level, so this parameter is a hint that is passed from a high-level API.
+    {extra_params}**kwargs : dict
+        Serves the compatibility purpose. Does not affect the result.
+
+    Returns
+    -------
+    BaseQueryCompiler
+        Result of binary operation.
+    """
+
+    extra_params = {
+        "logical": """
+        level : int or label
+            In case of MultiIndex match index values on the passed level.
+        axis : {{0, 1}}
+            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).
+            0 is for index, when 1 is for columns.
+        """,
+        "arithmetic": """
+        level : int or label
+            In case of MultiIndex match index values on the passed level.
+        axis : {{0, 1}}
+            Axis to match indices along for 1D `other` (list or QueryCompiler that represents Series).
+            0 is for index, when 1 is for columns.
+        fill_value : float or None
+            Value to fill missing elements during frame alignment.
+        """,
+    }
+
+    verbose_substitution = (
+        f"other {sign} self" if self_on_right else f"self {sign} other"
+    )
+    params_substitution = extra_params.get(op_type, "")
+    return doc_qc_method(
+        template,
+        extra_params=params_substitution,
+        operation=operation,
+        verbose=verbose_substitution,
+    )
+
+
+def doc_reduce_agg(method, refer_to, params=None, extra_params=None):
+    """
+    Build decorator which adds docstring for the reduce method.
+
+    Parameters
+    ----------
+    method : str
+        The result of the method.
+    refer_to : str
+        Method name in ``modin.pandas.DataFrame`` module to refer to for
+        more information about parameters and output format.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        to the docstring template.
+    extra_params : sequence of str, optional
+        Method parameter names to append to the docstring template. Parameter
+        type and description will be grabbed from ``extra_params_map`` (Please
+        refer to the source code of this function to explore the map).
+
+    Returns
+    -------
+    callable
+    """
+    template = """
+        Get the {method} for each column or row.
+        {params}
+        Returns
+        -------
+        BaseQueryCompiler
+            One-column QueryCompiler with index labels of the specified axis,
+            where each row contains the {method} for the corresponding
+            row or column.
+        """
+
+    if params is None:
+        params = """
+        axis : {{0, 1}}
+        level : None, default: None
+            Serves the compatibility purpose. Always has to be None.
+        numeric_only : bool, optional"""
+
+    extra_params_map = {
+        "skipna": """
+        skipna : bool, default: True""",
+        "min_count": """
+        min_count : int""",
+        "ddof": """
+        ddof : int""",
+        "*args": """
+        *args : iterable
+            Serves the compatibility purpose. Does not affect the result.""",
+        "**kwargs": """
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.""",
+    }
+
+    params += "".join(
+        [
+            align_indents(
+                source=params, target=extra_params_map.get(param, f"\n{param} : object")
+            )
+            for param in (extra_params or [])
+        ]
+    )
+    return doc_qc_method(
+        template,
+        params=params,
+        method=method,
+        refer_to=f"DataFrame.{refer_to}",
+    )
+
+
+doc_cum_agg = partial(
+    doc_qc_method,
+    template="""
+    Get cumulative {method} for every row or column.
+
+    Parameters
+    ----------
+    fold_axis : {{0, 1}}
+    skipna : bool
+    **kwargs : dict
+        Serves the compatibility purpose. Does not affect the result.
+
+    Returns
+    -------
+    BaseQueryCompiler
+        QueryCompiler of the same shape as `self`, where each element is the {method}
+        of all the previous values in this row or column.
+    """,
+    refer_to_module_name="DataFrame",
+)
+
+doc_resample = partial(
+    doc_qc_method,
+    template="""
+    Resample time-series data and apply aggregation on it.
+
+    Group data into intervals by time-series row/column with
+    a specified frequency and {action}.
+
+    Parameters
+    ----------
+    resample_kwargs : dict
+        Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
+    {extra_params}
+    Returns
+    -------
+    BaseQueryCompiler
+        New QueryCompiler containing the result of resample aggregation built by the
+        following rules:
+
+        {build_rules}
+    """,
+    refer_to_module_name="resample.Resampler",
+)
+
+
+def doc_resample_reduce(result, refer_to, params=None, compatibility_params=True):
+    """
+    Build decorator which adds docstring for the resample reduce method.
+
+    Parameters
+    ----------
+    result : str
+        The result of the method.
+    refer_to : str
+        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
+        more information about parameters and output format.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        to the docstring template.
+    compatibility_params : bool, default: True
+        Whether method takes `*args` and `**kwargs` that do not affect
+        the result.
+
+    Returns
+    -------
+    callable
+    """
+    action = f"compute {result} for each group"
+
+    params_substitution = (
+        (
+            """
+        *args : iterable
+            Serves the compatibility purpose. Does not affect the result.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+        """
+        )
+        if compatibility_params
+        else ""
+    )
+
+    if params:
+        params_substitution = format_string(
+            "{params}\n{params_substitution}",
+            params=params,
+            params_substitution=params_substitution,
+        )
+
+    build_rules = f"""
+            - Labels on the specified axis are the group names (time-stamps)
+            - Labels on the opposite of specified axis are preserved.
+            - Each element of QueryCompiler is the {result} for the
+              corresponding group and column/row."""
+    return doc_resample(
+        action=action,
+        extra_params=params_substitution,
+        build_rules=build_rules,
+        refer_to=refer_to,
+    )
+
+
+def doc_resample_agg(action, output, refer_to, params=None):
+    """
+    Build decorator which adds docstring for the resample aggregation method.
+
+    Parameters
+    ----------
+    action : str
+        What method does with the resampled data.
+    output : str
+        What is the content of column names in the result.
+    refer_to : str
+        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
+        more information about parameters and output format.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        to the docstring template.
+
+    Returns
+    -------
+    callable
+    """
+    action = f"{action} for each group over the specified axis"
+
+    params_substitution = """
+        *args : iterable
+            Positional arguments to pass to the aggregation function.
+        **kwargs : dict
+            Keyword arguments to pass to the aggregation function.
+        """
+
+    if params:
+        params_substitution = format_string(
+            "{params}\n{params_substitution}",
+            params=params,
+            params_substitution=params_substitution,
+        )
+
+    build_rules = f"""
+            - Labels on the specified axis are the group names (time-stamps)
+            - Labels on the opposite of specified axis are a MultiIndex, where first level
+              contains preserved labels of this axis and the second level is the {output}.
+            - Each element of QueryCompiler is the result of corresponding function for the
+              corresponding group and column/row."""
+    return doc_resample(
+        action=action,
+        extra_params=params_substitution,
+        build_rules=build_rules,
+        refer_to=refer_to,
+    )
+
+
+def doc_resample_fillna(method, refer_to, params=None, overwrite_template_params=False):
+    """
+    Build decorator which adds docstring for the resample fillna query compiler method.
+
+    Parameters
+    ----------
+    method : str
+        Fillna method name.
+    refer_to : str
+        Method name in ``modin.pandas.resample.Resampler`` module to refer to for
+        more information about parameters and output format.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        to the docstring template.
+    overwrite_template_params : bool, default: False
+        If `params` is specified indicates whether to overwrite method parameters in
+        the docstring template or append then at the end.
+
+    Returns
+    -------
+    callable
+    """
+    action = f"fill missing values in each group independently using {method} method"
+    params_substitution = "limit : int\n"
+
+    if params:
+        params_substitution = (
+            params
+            if overwrite_template_params
+            else format_string(
+                "{params}\n{params_substitution}",
+                params=params,
+                params_substitution=params_substitution,
+            )
+        )
+
+    build_rules = "- QueryCompiler contains unsampled data with missing values filled."
+
+    return doc_resample(
+        action=action,
+        extra_params=params_substitution,
+        build_rules=build_rules,
+        refer_to=refer_to,
+    )
+
+
+doc_dt = partial(
+    doc_qc_method,
+    template="""
+    Get {prop} for each {dt_type} value.
+    {params}
+    Returns
+    -------
+    BaseQueryCompiler
+        New QueryCompiler with the same shape as `self`, where each element is
+        {prop} for the corresponding {dt_type} value.
+    """,
+    one_column_method=True,
+    refer_to_module_name="Series.dt",
+)
+
+doc_dt_timestamp = partial(doc_dt, dt_type="datetime")
+doc_dt_interval = partial(doc_dt, dt_type="interval")
+doc_dt_period = partial(doc_dt, dt_type="period")
+
+doc_dt_round = partial(
+    doc_qc_method,
+    template="""
+    Perform {refer_to} operation on the underlying time-series data to the specified `freq`.
+
+    Parameters
+    ----------
+    freq : str
+    ambiguous : {{"raise", "infer", "NaT"}} or bool mask, default: "raise"
+    nonexistent : {{"raise", "shift_forward", "shift_backward", "NaT"}} or timedelta, default: "raise"
+
+    Returns
+    -------
+    BaseQueryCompiler
+        New QueryCompiler with performed {refer_to} operation on every element.
+    """,
+    one_column_method=True,
+    refer_to_module_name="Series.dt",
+)
+
+doc_str_method = partial(
+    doc_qc_method,
+    template="""
+    Apply "{refer_to}" function to each string value in QueryCompiler.
+    {params}
+    Returns
+    -------
+    BaseQueryCompiler
+        New QueryCompiler containing the result of execution of the "{refer_to}" function
+        against each string element.
+    """,
+    one_column_method=True,
+    refer_to_module_name="Series.str",
+)
+
+
+def doc_window_method(
+    result,
+    refer_to,
+    action=None,
+    win_type="rolling window",
+    params=None,
+    build_rules="aggregation",
+):
+    """
+    Build decorator which adds docstring for the window method.
+
+    Parameters
+    ----------
+    result : str
+        The result of the method.
+    refer_to : str
+        Method name in ``modin.pandas.window.Window`` module to refer to
+        for more information about parameters and output format.
+    action : str, optional
+        What method does with the created window.
+    win_type : str, default: "rolling_window"
+        Type of window that the method creates.
+    params : str, optional
+        Method parameters in the NumPy docstyle format to substitute
+        to the docstring template.
+    build_rules : str, default: "aggregation"
+        Description of the data output format.
+
+    Returns
+    -------
+    callable
+    """
+    template = """
+        Create {win_type} and {action} for each window over the given axis.
+
+        Parameters
+        ----------
+        fold_axis : {{0, 1}}
+        {window_args_name} : list
+            Rolling windows arguments with the same signature as ``modin.pandas.DataFrame.rolling``.
+        {extra_params}
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing {result} for each window, built by the following
+            rules:
+
+            {build_rules}
+        """
+    doc_build_rules = {
+        "aggregation": f"""
+            - Output QueryCompiler has the same shape and axes labels as the source.
+            - Each element is the {result} for the corresponding window.""",
+        "udf_aggregation": """
+            - Labels on the specified axis are preserved.
+            - Labels on the opposite of specified axis are MultiIndex, where first level
+              contains preserved labels of this axis and the second level has the function names.
+            - Each element of QueryCompiler is the result of corresponding function for the
+              corresponding window and column/row.""",
+    }
+    if action is None:
+        action = f"compute {result}"
+    window_args_name = "rolling_args" if win_type == "rolling window" else "window_args"
+
+    # We need that `params` value ended with new line to have
+    # an empty line between "parameters" and "return" sections
+    if params and params[-1] != "\n":
+        params += "\n"
+
+    if params is None:
+        params = ""
+
+    return doc_qc_method(
+        template,
+        result=result,
+        action=action,
+        win_type=win_type,
+        extra_params=params,
+        build_rules=doc_build_rules.get(build_rules, build_rules),
+        refer_to=f"Rolling.{refer_to}",
+        window_args_name=window_args_name,
+    )
+
+
+def doc_groupby_method(result, refer_to, action=None):
+    """
+    Build decorator which adds docstring for the groupby reduce method.
+
+    Parameters
+    ----------
+    result : str
+        The result of reduce.
+    refer_to : str
+        Method name in ``modin.pandas.groupby`` module to refer to
+        for more information about parameters and output format.
+    action : str, optional
+        What method does with groups.
+
+    Returns
+    -------
+    callable
+    """
+    template = """
+    Group QueryCompiler data and {action} for every group.
+
+    Parameters
+    ----------
+    by : BaseQueryCompiler, column or index label, Grouper or list of such
+        Object that determine groups.
+    axis : {{0, 1}}
+        Axis to group and apply aggregation function along.
+        0 is for index, when 1 is for columns.
+    groupby_kwargs : dict
+        GroupBy parameters as expected by ``modin.pandas.DataFrame.groupby`` signature.
+    agg_args : list-like
+        Positional arguments to pass to the `agg_func`.
+    agg_kwargs : dict
+        Key arguments to pass to the `agg_func`.
+    drop : bool, default: False
+        If `by` is a QueryCompiler indicates whether or not by-data came
+        from the `self`.
+
+    Returns
+    -------
+    BaseQueryCompiler
+        QueryCompiler containing the result of groupby reduce built by the
+        following rules:
+
+        - Labels on the opposite of specified axis are preserved.
+        - If groupby_args["as_index"] is True then labels on the specified axis
+          are the group names, otherwise labels would be default: 0, 1 ... n.
+        - If groupby_args["as_index"] is False, then first N columns/rows of the frame
+          contain group names, where N is the columns/rows to group on.
+        - Each element of QueryCompiler is the {result} for the
+          corresponding group and column/row.
+
+    .. warning
+        `map_args` and `reduce_args` parameters are deprecated. They're leaked here from
+        ``PandasQueryCompiler.groupby_*``, pandas storage format implements groupby via TreeReduce
+        approach, but for other storage formats these parameters make no sense, and so they'll be removed in the future.
+    """
+    if action is None:
+        action = f"compute {result}"
+
+    return doc_qc_method(
+        template, result=result, action=action, refer_to=f"GroupBy.{refer_to}"
+    )
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/query_compiler.py
new file mode 100644
index 00000000000..a85f2be2245
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/compiler/query_compiler.py
@@ -0,0 +1,4014 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""
+Module contains class ``BaseQueryCompiler``.
+
+``BaseQueryCompiler`` is a parent abstract class for any other query compiler class.
+"""
+
+import abc
+from collections.abc import Hashable
+from typing import Any, Optional
+
+import numpy as np
+import pandas
+import pandas.core.resample
+from pandas._libs.lib import no_default
+from pandas._typing import Axis, IndexLabel, Suffixes
+from pandas.core.dtypes.common import is_scalar
+
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas import (
+    BinaryDefault,
+    CatDefault,
+    DataFrameDefault,
+    DateTimeDefault,
+    GroupByDefault,
+    ResampleDefault,
+    RollingDefault,
+    SeriesDefault,
+    StrDefault,
+)
+from snowflake.snowpark.modin.plugin.compiler import doc_utils
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import (
+    MODIN_UNNAMED_SERIES_LABEL,
+    try_cast_to_pandas,
+)
+
+
+# FIXME: many of the BaseQueryCompiler methods are hiding actual arguments
+# by using *args and **kwargs. They should be spread into actual parameters.
+# Currently actual arguments are placed in the methods docstrings, but since they're
+# not presented in the function's signature it makes linter to raise `PR02: unknown parameters`
+# warning. For now, they're silenced by using `noqa` (Modin issue #3108).
+class BaseQueryCompiler(abc.ABC):
+    """
+    Abstract class that handles the queries to Modin dataframes.
+
+    This class defines common query compilers API, most of the methods
+    are already implemented and defaulting to pandas.
+
+    Attributes
+    ----------
+    _shape_hint : {"row", "column", None}, default: None
+        Shape hint for frames known to be a column or a row, otherwise None.
+
+    Notes
+    -----
+    See the Abstract Methods and Fields section immediately below this
+    for a list of requirements for subclassing this object.
+    """
+
+    def default_to_pandas(self, pandas_op, *args, **kwargs):
+        """
+        Do fallback to pandas for the passed function.
+
+        Parameters
+        ----------
+        pandas_op : callable(pandas.DataFrame) -> object
+            Function to apply to the casted to pandas frame.
+        *args : iterable
+            Positional arguments to pass to `pandas_op`.
+        **kwargs : dict
+            Key-value arguments to pass to `pandas_op`.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            The result of the `pandas_op`, converted back to ``BaseQueryCompiler``.
+        """
+        args = try_cast_to_pandas(args)
+        kwargs = try_cast_to_pandas(kwargs)
+
+        result = pandas_op(try_cast_to_pandas(self), *args, **kwargs)
+        if isinstance(result, pandas.Series):
+            if result.name is None:
+                result.name = MODIN_UNNAMED_SERIES_LABEL
+            result = result.to_frame()
+        if isinstance(result, pandas.DataFrame):
+            return self.from_pandas(result, type(self._modin_frame))
+        else:
+            return result
+
+    # Abstract Methods and Fields: Must implement in children classes
+    # In some cases, there you may be able to use the same implementation for
+    # some of these abstract methods, but for the sake of generality they are
+    # treated differently.
+
+    _shape_hint = None
+
+    # Metadata modification abstract methods
+    def add_prefix(self, prefix, axis=1):
+        """
+        Add string prefix to the index labels along specified axis.
+
+        Parameters
+        ----------
+        prefix : str
+            The string to add before each label.
+        axis : {0, 1}, default: 1
+            Axis to add prefix along. 0 is for index and 1 is for columns.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New query compiler with updated labels.
+        """
+        if axis:
+            return DataFrameDefault.register(pandas.DataFrame.add_prefix)(
+                self, prefix=prefix
+            )
+        else:
+            return SeriesDefault.register(pandas.Series.add_prefix)(self, prefix=prefix)
+
+    def add_suffix(self, suffix, axis=1):
+        """
+        Add string suffix to the index labels along specified axis.
+
+        Parameters
+        ----------
+        suffix : str
+            The string to add after each label.
+        axis : {0, 1}, default: 1
+            Axis to add suffix along. 0 is for index and 1 is for columns.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New query compiler with updated labels.
+        """
+        if axis:
+            return DataFrameDefault.register(pandas.DataFrame.add_suffix)(
+                self, suffix=suffix
+            )
+        else:
+            return SeriesDefault.register(pandas.Series.add_suffix)(self, suffix=suffix)
+
+    # END Metadata modification abstract methods
+
+    # Abstract copy
+
+    def copy(self):
+        """
+        Make a copy of this object.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Copy of self.
+
+        Notes
+        -----
+        For copy, we don't want a situation where we modify the metadata of the
+        copies if we end up modifying something here. We copy all of the metadata
+        to prevent that.
+        """
+        raise NotImplementedError  # pragma: no cover
+
+    # END Abstract copy
+
+    # Data Management Methods
+    @abc.abstractmethod
+    def free(self):
+        """Trigger a cleanup of this object."""
+        pass
+
+    @abc.abstractmethod
+    def finalize(self):
+        """Finalize constructing the dataframe calling all deferred functions which were used to build it."""
+        pass
+
+    # END Data Management Methods
+
+    # To/From pandas
+    @abc.abstractmethod
+    def to_pandas(
+        self,
+        *,
+        statement_params: Optional[dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> pandas.DataFrame:
+        """
+        Convert underlying query compilers data to ``pandas.DataFrame``.
+
+        Args:
+            statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+        Returns:
+        pandas.DataFrame
+            The QueryCompiler converted to pandas."""
+        pass
+
+    @classmethod
+    @abc.abstractmethod
+    def from_pandas(cls, df, data_cls):
+        """
+        Build QueryCompiler from pandas DataFrame.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+            The pandas DataFrame to convert from.
+        data_cls : type
+            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
+            (or its descendant) to convert to.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the pandas DataFrame.
+        """
+        pass
+
+    # END To/From pandas
+
+    # From Arrow
+    @classmethod
+    @abc.abstractmethod
+    def from_arrow(cls, at, data_cls):
+        """
+        Build QueryCompiler from Arrow Table.
+
+        Parameters
+        ----------
+        at : Arrow Table
+            The Arrow Table to convert from.
+        data_cls : type
+            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
+            (or its descendant) to convert to.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the pandas DataFrame.
+        """
+        pass
+
+    # END From Arrow
+
+    # Dataframe exchange protocol
+
+    @abc.abstractmethod
+    def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
+        """
+        Get a DataFrame exchange protocol object representing data of the Modin DataFrame.
+
+        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default: False
+            A keyword intended for the consumer to tell the producer
+            to overwrite null values in the data with ``NaN`` (or ``NaT``).
+            This currently has no effect; once support for nullable extension
+            dtypes is added, this value should be propagated to columns.
+        allow_copy : bool, default: True
+            A keyword that defines whether or not the library is allowed
+            to make a copy of the data. For example, copying data would be necessary
+            if a library supports strided buffers, given that this protocol
+            specifies contiguous buffers. Currently, if the flag is set to ``False``
+            and a copy is needed, a ``RuntimeError`` will be raised.
+
+        Returns
+        -------
+        ProtocolDataframe
+            A dataframe object following the DataFrame protocol specification.
+        """
+        pass
+
+    @classmethod
+    @abc.abstractmethod
+    def from_dataframe(cls, df, data_cls):
+        """
+        Build QueryCompiler from a DataFrame object supporting the dataframe exchange protocol `__dataframe__()`.
+
+        Parameters
+        ----------
+        df : DataFrame
+            The DataFrame object supporting the dataframe exchange protocol.
+        data_cls : type
+            :py:class:`~modin.core.dataframe.pandas.dataframe.dataframe.PandasDataframe` class
+            (or its descendant) to convert to.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing data from the DataFrame.
+        """
+        pass
+
+    # END Dataframe exchange protocol
+
+    # Abstract inter-data operations (e.g. add, sub)
+    # These operations require two DataFrames and will change the shape of the
+    # data if the index objects don't match. An outer join + op is performed,
+    # such that columns/rows that don't have an index on the other DataFrame
+    # result in NaN values.
+
+    @doc_utils.doc_binary_method(operation="addition", sign="+")
+    def add(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.add)(self, other=other, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.combine")
+    def combine(self, other, **kwargs):  # noqa: PR02
+        """
+        Perform column-wise combine with another QueryCompiler with passed `func`.
+
+        If axes are not equal, perform frames alignment first.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler
+            Left operand of the binary operation.
+        func : callable(pandas.Series, pandas.Series) -> pandas.Series
+            Function that takes two ``pandas.Series`` with aligned axes
+            and returns one ``pandas.Series`` as resulting combination.
+        fill_value : float or None
+            Value to fill missing values with after frame alignment occurred.
+        overwrite : bool
+            If True, columns in `self` that do not exist in `other`
+            will be overwritten with NaNs.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Result of combine.
+        """
+        return BinaryDefault.register(pandas.DataFrame.combine)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.combine_first")
+    def combine_first(self, other, **kwargs):  # noqa: PR02
+        """
+        Fill null elements of `self` with value in the same location in `other`.
+
+        If axes are not equal, perform frames alignment first.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler
+            Provided frame to use to fill null values from.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        return BinaryDefault.register(pandas.DataFrame.combine_first)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="equality comparison", sign="==")
+    def eq(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.eq)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="integer division", sign="//")
+    def floordiv(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.floordiv)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="greater than or equal comparison", sign=">=", op_type="comparison"
+    )
+    def ge(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.ge)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(
+        operation="greater than comparison", sign=">", op_type="comparison"
+    )
+    def gt(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.gt)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(
+        operation="less than or equal comparison", sign="<=", op_type="comparison"
+    )
+    def le(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.le)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(
+        operation="less than comparison", sign="<", op_type="comparison"
+    )
+    def lt(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.lt)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="modulo", sign="%")
+    def mod(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.mod)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="multiplication", sign="*")
+    def mul(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.mul)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(
+        operation="multiplication", sign="*", self_on_right=True
+    )
+    def rmul(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rmul)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.corr")
+    def corr(self, **kwargs):  # noqa: PR02
+        """
+        Compute pairwise correlation of columns, excluding NA/null values.
+
+        Parameters
+        ----------
+        method : {'pearson', 'kendall', 'spearman'} or callable(pandas.Series, pandas.Series) -> pandas.Series
+            Correlation method.
+        min_periods : int
+            Minimum number of observations required per pair of columns
+            to have a valid result. If fewer than `min_periods` non-NA values
+            are present the result will be NA.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Correlation matrix.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.corr)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.cov")
+    def cov(self, **kwargs):  # noqa: PR02
+        """
+        Compute pairwise covariance of columns, excluding NA/null values.
+
+        Parameters
+        ----------
+        min_periods : int
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Covariance matrix.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.cov)(self, **kwargs)
+
+    def dot(self, other, **kwargs):  # noqa: PR02
+        """
+        Compute the matrix multiplication of `self` and `other`.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler or NumPy array
+            The other query compiler or NumPy array to matrix multiply with `self`.
+        squeeze_self : boolean
+            If `self` is a one-column query compiler, indicates whether it represents Series object.
+        squeeze_other : boolean
+            If `other` is a one-column query compiler, indicates whether it represents Series object.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            A new query compiler that contains result of the matrix multiply.
+        """
+        if kwargs.get("squeeze_self", False):
+            applyier = pandas.Series.dot
+        else:
+            applyier = pandas.DataFrame.dot
+        return BinaryDefault.register(applyier)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(
+        operation="not equal comparison", sign="!=", op_type="comparison"
+    )
+    def ne(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.ne)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="exponential power", sign="**")
+    def pow(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.pow)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="addition", sign="+", self_on_right=True)
+    def radd(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.radd)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="integer division", sign="//", self_on_right=True
+    )
+    def rfloordiv(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rfloordiv)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="modulo", sign="%", self_on_right=True)
+    def rmod(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rmod)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="exponential power", sign="**", self_on_right=True
+    )
+    def rpow(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rpow)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="subtraction", sign="-", self_on_right=True)
+    def rsub(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rsub)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="division", sign="/", self_on_right=True)
+    def rtruediv(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rtruediv)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="subtraction", sign="-")
+    def sub(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.sub)(self, other=other, **kwargs)
+
+    @doc_utils.doc_binary_method(operation="division", sign="/")
+    def truediv(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.truediv)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="conjunction", sign="&", op_type="logical")
+    def __and__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__and__)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="disjunction", sign="|", op_type="logical")
+    def __or__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__or__)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="conjunction", sign="&", op_type="logical", self_on_right=True
+    )
+    def __rand__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__rand__)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="disjunction", sign="|", op_type="logical", self_on_right=True
+    )
+    def __ror__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__ror__)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(
+        operation="exclusive or", sign="^", op_type="logical", self_on_right=True
+    )
+    def __rxor__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__rxor__)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.doc_binary_method(operation="exclusive or", sign="^", op_type="logical")
+    def __xor__(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.__xor__)(
+            self, other=other, **kwargs
+        )
+
+    # FIXME: query compiler shoudln't care about differences between Frame and Series.
+    # We should combine `df_update` and `series_update` into one method (Modin issue #3101).
+    @doc_utils.add_refer_to("DataFrame.update")
+    def df_update(self, other, **kwargs):  # noqa: PR02
+        """
+        Update values of `self` using non-NA values of `other` at the corresponding positions.
+
+        If axes are not equal, perform frames alignment first.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler
+            Frame to grab replacement values from.
+        join : {"left"}
+            Specify type of join to align frames if axes are not equal
+            (note: currently only one type of join is implemented).
+        overwrite : bool
+            Whether to overwrite every corresponding value of self, or only if it's NAN.
+        filter_func : callable(pandas.Series, pandas.Series) -> numpy.ndarray<bool>
+            Function that takes column of the self and return bool mask for values, that
+            should be overwritten in the self frame.
+        errors : {"raise", "ignore"}
+            If "raise", will raise a ``ValueError`` if `self` and `other` both contain
+            non-NA data in the same place.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with updated values.
+        """
+        return BinaryDefault.register(pandas.DataFrame.update, inplace=True)(
+            self, other=other, **kwargs
+        )
+
+    @doc_utils.add_refer_to("Series.update")
+    def series_update(self, other, **kwargs):  # noqa: PR02
+        """
+        Update values of `self` using values of `other` at the corresponding indices.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler
+            One-column query compiler with updated values.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with updated values.
+        """
+        return BinaryDefault.register(pandas.Series.update, inplace=True)(
+            self,
+            other=other,
+            squeeze_self=True,
+            **kwargs,
+        )
+
+    @doc_utils.add_refer_to("DataFrame.clip")
+    def clip(self, lower, upper, **kwargs):  # noqa: PR02
+        """
+        Trim values at input threshold.
+
+        Parameters
+        ----------
+        lower : float or list-like
+        upper : float or list-like
+        axis : {0, 1}
+        inplace : {False}
+            This parameter serves the compatibility purpose. Always has to be False.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with values limited by the specified thresholds.
+        """
+        if isinstance(lower, BaseQueryCompiler):
+            lower = lower.to_pandas().squeeze(1)
+        if isinstance(upper, BaseQueryCompiler):
+            upper = upper.to_pandas().squeeze(1)
+        return DataFrameDefault.register(pandas.DataFrame.clip)(
+            self, lower=lower, upper=upper, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.merge")
+    def merge(self, right, **kwargs):  # noqa: PR02
+        """
+        Merge QueryCompiler objects using a database-style join.
+
+        Parameters
+        ----------
+        right : BaseQueryCompiler
+            QueryCompiler of the right frame to merge with.
+        how : {"left", "right", "outer", "inner", "cross"}
+        on : label or list of such
+        left_on : label or list of such
+        right_on : label or list of such
+        left_index : bool
+        right_index : bool
+        sort : bool
+        suffixes : list-like
+        copy : bool
+        indicator : bool or str
+        validate : str
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler that contains result of the merge.
+        """
+        raise NotImplementedError
+
+    def _get_column_as_pandas_series(self, key):
+        """
+        Get column data by label as pandas.Series.
+
+        Parameters
+        ----------
+        key : Any
+            Column label.
+
+        Returns
+        -------
+        pandas.Series
+        """
+        result = self.getitem_array([key]).to_pandas().squeeze(axis=1)
+        if not isinstance(result, pandas.Series):
+            raise RuntimeError(
+                f"Expected getting column {key} to give "
+                + f"pandas.Series, but instead got {type(result)}"
+            )
+        return result
+
+    def merge_asof(
+        self,
+        right: "BaseQueryCompiler",
+        left_on: Optional[IndexLabel] = None,
+        right_on: Optional[IndexLabel] = None,
+        left_index: bool = False,
+        right_index: bool = False,
+        left_by=None,
+        right_by=None,
+        suffixes: Suffixes = ("_x", "_y"),
+        tolerance=None,
+        allow_exact_matches: bool = True,
+        direction: str = "backward",
+    ):
+        # pandas fallbacks for tricky cases:
+        if (
+            # No idea how this works or why it does what it does; and in fact
+            # there's a pandas bug suggesting it's wrong:
+            # https://github.com/pandas-dev/pandas/issues/33463
+            (left_index and right_on is not None)
+            # This is the case where by is a list of columns. If we're copying lots
+            # of columns out of pandas, maybe not worth trying our path, it's not
+            # clear it's any better:
+            or not (left_by is None or is_scalar(left_by))
+            or not (right_by is None or is_scalar(right_by))
+            # The implementation below assumes that the right index is unique
+            # because it uses merge_asof to map each position in the merged
+            # index to the label of the one right row that should be merged
+            # at that row position.
+            or not right.index.is_unique
+        ):
+            return self.default_to_pandas(
+                pandas.merge_asof,
+                right,
+                left_on=left_on,
+                right_on=right_on,
+                left_index=left_index,
+                right_index=right_index,
+                left_by=left_by,
+                right_by=right_by,
+                suffixes=suffixes,
+                tolerance=tolerance,
+                allow_exact_matches=allow_exact_matches,
+                direction=direction,
+            )
+
+        if left_on is None:
+            left_column = self.index
+        else:
+            left_column = self._get_column_as_pandas_series(left_on)
+
+        if right_on is None:
+            right_column = right.index
+        else:
+            right_column = right._get_column_as_pandas_series(right_on)
+
+        left_pandas_limited = {"on": left_column}
+        right_pandas_limited = {"on": right_column, "right_labels": right.index}
+        extra_kwargs = {}  # extra arguments to pandas merge_asof # pragma: no cover
+
+        if left_by is not None or right_by is not None:
+            extra_kwargs["by"] = "by"
+            left_pandas_limited["by"] = self._get_column_as_pandas_series(left_by)
+            right_pandas_limited["by"] = right._get_column_as_pandas_series(right_by)
+
+        # 1. Construct pandas DataFrames with just the 'on' and optional 'by'
+        # columns, and the index as another column.
+        left_pandas_limited = pandas.DataFrame(left_pandas_limited, index=self.index)
+        right_pandas_limited = pandas.DataFrame(right_pandas_limited)
+
+        # 2. Use pandas' merge_asof to figure out how to map labels on left to
+        # labels on the right.
+        merged = pandas.merge_asof(
+            left_pandas_limited,
+            right_pandas_limited,
+            on="on",
+            direction=direction,
+            allow_exact_matches=allow_exact_matches,
+            tolerance=tolerance,
+            **extra_kwargs,
+        )
+        # Now merged["right_labels"] shows which labels from right map to left's index.
+
+        # 3. Re-index right using the merged["right_labels"]; at this point right
+        # should be same length and (semantically) same order as left:
+        right_subset = right.reindex(
+            axis=0, labels=pandas.Index(merged["right_labels"])
+        )
+        if not right_index:
+            right_subset = right_subset.drop(columns=[right_on])
+        if right_by is not None and left_by == right_by:
+            right_subset = right_subset.drop(columns=[right_by])
+        right_subset.index = self.index
+
+        # 4. Merge left and the new shrunken right:
+        result = self.merge(
+            right_subset,
+            left_index=True,
+            right_index=True,
+            suffixes=suffixes,
+            how="left",
+        )
+
+        # 5. Clean up to match pandas output:
+        if left_on is not None and right_index:
+            result = result.insert(
+                # In theory this could use get_indexer_for(), but that causes an error:
+                list(result.columns).index(left_on + suffixes[0]),
+                left_on,
+                result.getitem_array([left_on + suffixes[0]]),
+            )
+        if not left_index and not right_index:
+            result = result.reset_index(drop=True)
+
+        return result
+
+    # END Abstract inter-data operations
+
+    def is_series_like(self):
+        raise NotImplementedError  # pragma: no cover
+
+    # END Abstract Transpose
+
+    # Abstract reindex/reset_index (may shuffle data)
+    @doc_utils.add_refer_to("DataFrame.reindex")
+    def reindex(self, axis, labels, **kwargs):  # noqa: PR02
+        """
+        Align QueryCompiler data with a new index along specified axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to align labels along. 0 is for index, 1 is for columns.
+        labels : list-like
+            Index-labels to align with.
+        method : {None, "backfill"/"bfill", "pad"/"ffill", "nearest"}
+            Method to use for filling holes in reindexed frame.
+        fill_value : scalar
+            Value to use for missing values in the resulted frame.
+        limit : int
+        tolerance : int
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with aligned axis.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.reindex)(
+            self, axis=axis, labels=labels, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.reset_index")
+    def reset_index(self, **kwargs):  # noqa: PR02
+        """
+        Reset the index, or a level of it.
+
+        Parameters
+        ----------
+        drop : bool
+            Whether to drop the reset index or insert it at the beginning of the frame.
+        level : int or label, optional
+            Level to remove from index. Removes all levels by default.
+        col_level : int or label
+            If the columns have multiple levels, determines which level the labels
+            are inserted into.
+        col_fill : label
+            If the columns have multiple levels, determines how the other levels
+            are named.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with reset index.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.reset_index)(self, **kwargs)
+
+    # END Abstract reindex/reset_index
+
+    # Full Reduce operations
+    #
+    # These operations result in a reduced dimensionality of data.
+    # Currently, this means a pandas Series will be returned, but in the future
+    # we will implement a Distributed Series, and this will be returned
+    # instead.
+
+    def is_monotonic_increasing(self):
+        """
+        Return boolean if values in the object are monotonically increasing.
+
+        Returns
+        -------
+        bool
+        """
+        return SeriesDefault.register(pandas.Series.is_monotonic_increasing)(self)
+
+    def is_monotonic_decreasing(self):
+        """
+        Return boolean if values in the object are monotonically decreasing.
+
+        Returns
+        -------
+        bool
+        """
+        return SeriesDefault.register(pandas.Series.is_monotonic_decreasing)(self)
+
+    @doc_utils.doc_reduce_agg(
+        method="production",
+        refer_to="prod",
+        extra_params=["**kwargs"],
+        params="axis : {0, 1}",
+    )
+    def prod(self, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.prod)(self, **kwargs)
+
+    # END Abstract full Reduce operations
+
+    # Abstract map partitions operations
+    # These operations are operations that apply a function to every partition.
+    def abs(self):
+        """
+        Get absolute numeric value of each element.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with absolute numeric value of each element.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.abs)(self)
+
+    # FIXME: `**kwargs` which follows `numpy.conj` signature was inherited
+    # from ``PandasQueryCompiler``, we should get rid of this dependency.
+    # (Modin issue #3108)
+    def conj(self, **kwargs):
+        """
+        Get the complex conjugate for every element of self.
+
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with conjugate applied element-wise.
+
+        Notes
+        -----
+        Please refer to ``numpy.conj`` for parameters description.
+        """
+
+        def conj(df, *args, **kwargs):
+            return pandas.DataFrame(np.conj(df))
+
+        return DataFrameDefault.register(conj)(self, **kwargs)
+
+    # FIXME: this method is not supposed to take any parameters (Modin issue #3108).
+    def negative(self, **kwargs):
+        """
+        Change the sign for every value of self.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+
+        Notes
+        -----
+        Be aware, that all QueryCompiler values have to be numeric.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.__neg__)(self, **kwargs)
+
+    @doc_utils.add_one_column_warning
+    # FIXME: adding refer-to note will create two instances of the "Notes" section,
+    # this breaks numpydoc style rules and also crashes the doc-style checker script.
+    # For now manually added the refer-to message.
+    # @doc_utils.add_refer_to("Series.view")
+    def series_view(self, **kwargs):  # noqa: PR02
+        """
+        Reinterpret underlying data with new dtype.
+
+        Parameters
+        ----------
+        dtype : dtype
+            Data type to reinterpret underlying data with.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler of the same data in memory, with reinterpreted values.
+
+        Notes
+        -----
+            - Be aware, that if this method do fallback to pandas, then newly created
+              QueryCompiler will be the copy of the original data.
+            - Please refer to ``modin.pandas.Series.view`` for more information
+              about parameters and output format.
+        """
+        return SeriesDefault.register(pandas.Series.view)(self, **kwargs)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("to_timedelta")
+    def to_timedelta(self, unit="ns", errors="raise"):  # noqa: PR02
+        """
+        Convert argument to timedelta.
+
+        Parameters
+        ----------
+        unit : str, default: "ns"
+            Denotes the unit of the arg for numeric arg. Defaults to "ns".
+        errors : {"ignore", "raise", "coerce"}, default: "raise"
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with converted to timedelta values.
+        """
+        return SeriesDefault.register(pandas.to_timedelta)(
+            self, unit=unit, errors=errors
+        )
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.searchsorted")
+    def searchsorted(self, **kwargs):  # noqa: PR02
+        """
+        Find positions in a sorted `self` where `value` should be inserted to maintain order.
+
+        Parameters
+        ----------
+        value : list-like
+        side : {"left", "right"}
+        sorter : list-like, optional
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            One-column QueryCompiler which contains indices to insert.
+        """
+        return SeriesDefault.register(pandas.Series.searchsorted)(self, **kwargs)
+
+    # END Abstract map partitions operations
+
+    @doc_utils.add_refer_to("DataFrame.stack")
+    def stack(self, level, dropna):
+        """
+        Stack the prescribed level(s) from columns to index.
+
+        Parameters
+        ----------
+        level : int or label
+        dropna : bool
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        return DataFrameDefault.register(pandas.DataFrame.stack)(
+            self, level=level, dropna=dropna
+        )
+
+    def infer_objects(self):
+        """
+        Attempt to infer better dtypes for object columns.
+
+        Attempts soft conversion of object-dtyped columns, leaving non-object
+        and unconvertible columns unchanged. The inference rules are the same
+        as during normal Series/DataFrame construction.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New query compiler with udpated dtypes.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.infer_objects)(self)
+
+    @property
+    def dtypes(self):
+        """
+        Get columns dtypes.
+
+        Returns
+        -------
+        pandas.Series
+            Series with dtypes of each column.
+        """
+        return self.to_pandas().dtypes
+
+    # END Abstract map partitions across select indices
+
+    # Abstract column/row partitions reduce operations
+    #
+    # These operations result in a reduced dimensionality of data.
+    # Currently, this means a pandas Series will be returned, but in the future
+    # we will implement a Distributed Series, and this will be returned
+    # instead.
+
+    # FIXME: we're handling level parameter at front-end, it shouldn't
+    # propagate to the query compiler (Modin issue #3102)
+    @doc_utils.add_refer_to("DataFrame.all")
+    def all(self, **kwargs):  # noqa: PR02
+        """
+        Return whether all the elements are true, potentially over an axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}, optional
+        bool_only : bool, optional
+        skipna : bool
+        level : int or label
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            If axis was specified return one-column QueryCompiler with index labels
+            of the specified axis, where each row contains boolean of whether all elements
+            at the corresponding row or column are True. Otherwise return QueryCompiler
+            with a single bool of whether all elements are True.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.all)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.any")
+    def any(self, **kwargs):  # noqa: PR02
+        """
+        Return whether any element is true, potentially over an axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}, optional
+        bool_only : bool, optional
+        skipna : bool
+        level : int or label
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            If axis was specified return one-column QueryCompiler with index labels
+            of the specified axis, where each row contains boolean of whether any element
+            at the corresponding row or column is True. Otherwise return QueryCompiler
+            with a single bool of whether any element is True.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.any)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.memory_usage")
+    def memory_usage(self, **kwargs):  # noqa: PR02
+        """
+        Return the memory usage of each column in bytes.
+
+        Parameters
+        ----------
+        index : bool
+        deep : bool
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            One-column QueryCompiler with index labels of `self`, where each row
+            contains the memory usage for the corresponding column.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.memory_usage)(self, **kwargs)
+
+    @doc_utils.doc_reduce_agg(
+        method="value at the given quantile",
+        refer_to="quantile",
+        params="""
+        q : float
+        axis : {0, 1}
+        numeric_only : bool
+        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}""",
+        extra_params=["**kwargs"],
+    )
+    def quantile_for_single_value(self, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)
+
+    @doc_utils.doc_reduce_agg(
+        method="unbiased skew", refer_to="skew", extra_params=["skipna", "**kwargs"]
+    )
+    def skew(self, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.skew)(self, **kwargs)
+
+    @doc_utils.doc_reduce_agg(
+        method="standard deviation of the mean",
+        refer_to="sem",
+        extra_params=["skipna", "ddof", "**kwargs"],
+    )
+    def sem(self, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.sem)(self, **kwargs)
+
+    # END Abstract column/row partitions reduce operations
+
+    # Abstract column/row partitions reduce operations over select indices
+    #
+    # These operations result in a reduced dimensionality of data.
+    # Currently, this means a pandas Series will be returned, but in the future
+    # we will implement a Distributed Series, and this will be returned
+    # instead.
+    @doc_utils.add_refer_to("DataFrame.describe")
+    def describe(self, **kwargs):  # noqa: PR02
+        """
+        Generate descriptive statistics.
+
+        Parameters
+        ----------
+        percentiles : list-like
+        include : "all" or list of dtypes, optional
+        exclude : list of dtypes, optional
+        datetime_is_numeric : bool
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler object containing the descriptive statistics
+            of the underlying data.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.describe)(self, **kwargs)
+
+    # END Abstract column/row partitions reduce operations over select indices
+
+    # Map across rows/columns
+    # These operations require some global knowledge of the full column/row
+    # that is being operated on. This means that we have to put all of that
+    # data in the same place.
+
+    @doc_utils.doc_cum_agg(method="sum", refer_to="cumsum")
+    def cumsum(self, fold_axis, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cumsum)(self, **kwargs)
+
+    @doc_utils.doc_cum_agg(method="maximum", refer_to="cummax")
+    def cummax(self, fold_axis, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cummax)(self, **kwargs)
+
+    @doc_utils.doc_cum_agg(method="minimum", refer_to="cummin")
+    def cummin(self, fold_axis, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cummin)(self, **kwargs)
+
+    @doc_utils.doc_cum_agg(method="product", refer_to="cumprod")
+    def cumprod(self, fold_axis, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cumprod)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.diff")
+    def diff(self, fold_axis, **kwargs):  # noqa: PR02
+        """
+        First discrete difference of element.
+
+        Parameters
+        ----------
+        periods : int
+        fold_axis : {0, 1}
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler of the same shape as `self`, where each element is the difference
+            between the corresponding value and the previous value in this row or column.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.diff)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.nlargest")
+    def nlargest(self, n=5, columns=None, keep="first"):
+        """
+        Return the first `n` rows ordered by `columns` in descending order.
+
+        Parameters
+        ----------
+        n : int, default: 5
+        columns : list of labels, optional
+            Column labels to order by.
+            (note: this parameter can be omitted only for a single-column query compilers
+            representing Series object, otherwise `columns` has to be specified).
+        keep : {"first", "last", "all"}, default: "first"
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        if columns is None:
+            return SeriesDefault.register(pandas.Series.nlargest)(self, n=n, keep=keep)
+        else:
+            return DataFrameDefault.register(pandas.DataFrame.nlargest)(
+                self, n=n, columns=columns, keep=keep
+            )
+
+    @doc_utils.add_refer_to("DataFrame.nsmallest")
+    def nsmallest(self, n=5, columns=None, keep="first"):
+        """
+        Return the first `n` rows ordered by `columns` in ascending order.
+
+        Parameters
+        ----------
+        n : int, default: 5
+        columns : list of labels, optional
+            Column labels to order by.
+            (note: this parameter can be omitted only for a single-column query compilers
+            representing Series object, otherwise `columns` has to be specified).
+        keep : {"first", "last", "all"}, default: "first"
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        if columns is None:
+            return SeriesDefault.register(pandas.Series.nsmallest)(self, n=n, keep=keep)
+        else:
+            return DataFrameDefault.register(pandas.DataFrame.nsmallest)(
+                self, n=n, columns=columns, keep=keep
+            )
+
+    @doc_utils.add_refer_to("DataFrame.eval")
+    def eval(self, expr, **kwargs):
+        """
+        Evaluate string expression on QueryCompiler columns.
+
+        Parameters
+        ----------
+        expr : str
+        **kwargs : dict
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing the result of evaluation.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.eval)(
+            self, expr=expr, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.mode")
+    def mode(self, **kwargs):  # noqa: PR02
+        """
+        Get the modes for every column or row.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+        numeric_only : bool
+        dropna : bool
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with modes calculated along given axis.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.mode)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.query")
+    def query(self, expr, **kwargs):
+        """
+        Query columns of the QueryCompiler with a boolean expression.
+
+        Parameters
+        ----------
+        expr : str
+        **kwargs : dict
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the rows where the boolean expression is satisfied.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.query)(
+            self, expr=expr, **kwargs
+        )
+
+    @doc_utils.add_refer_to("DataFrame.rank")
+    def rank(self, **kwargs):  # noqa: PR02
+        """
+        Compute numerical rank along the specified axis.
+
+        By default, equal values are assigned a rank that is the average of the ranks
+        of those values, this behavior can be changed via `method` parameter.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+        method : {"average", "min", "max", "first", "dense"}
+        numeric_only : bool
+        na_option : {"keep", "top", "bottom"}
+        ascending : bool
+        pct : bool
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler of the same shape as `self`, where each element is the
+            numerical rank of the corresponding value along row or column.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.rank)(self, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.melt")
+    def melt(self, *args, **kwargs):  # noqa: PR02
+        """
+        Unpivot QueryCompiler data from wide to long format.
+
+        Parameters
+        ----------
+        id_vars : list of labels, optional
+        value_vars : list of labels, optional
+        var_name : label
+        value_name : label
+        col_level : int or label
+        ignore_index : bool
+        *args : iterable
+            Serves the compatibility purpose. Does not affect the result.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with unpivoted data.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.melt)(self, *args, **kwargs)
+
+    @doc_utils.add_refer_to("DataFrame.sort_values")
+    def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):  # noqa: PR02
+        """
+        Reorder the columns based on the lexicographic order of the given rows.
+
+        Parameters
+        ----------
+        rows : label or list of labels
+            The row or rows to sort by.
+        ascending : bool, default: True
+            Sort in ascending order (True) or descending order (False).
+        kind : {"quicksort", "mergesort", "heapsort"}
+        na_position : {"first", "last"}
+        ignore_index : bool
+        key : callable(pandas.Index) -> pandas.Index, optional
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler that contains result of the sort.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.sort_values)(
+            self, by=rows, axis=1, ascending=ascending, **kwargs
+        )
+
+    # END Abstract map across rows/columns
+
+    # Map across rows/columns
+    # These operations require some global knowledge of the full column/row
+    # that is being operated on. This means that we have to put all of that
+    # data in the same place.
+    @doc_utils.doc_reduce_agg(
+        method="value at the given quantile",
+        refer_to="quantile",
+        params="""
+        q : list-like
+        axis : {0, 1}
+        numeric_only : bool
+        interpolation : {"linear", "lower", "higher", "midpoint", "nearest"}""",
+        extra_params=["**kwargs"],
+    )
+    def quantile_for_list_of_values(self, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.quantile)(self, **kwargs)
+
+    # END Abstract map across rows/columns
+
+    # Abstract __getitem__ methods
+    def getitem_array(self, key):
+        """
+        Mask QueryCompiler with `key`.
+
+        Parameters
+        ----------
+        key : BaseQueryCompiler, np.ndarray or list of column labels
+            Boolean mask represented by QueryCompiler or ``np.ndarray`` of the same
+            shape as `self`, or enumerable of columns to pick.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New masked QueryCompiler.
+        """
+        if isinstance(key, type(self)):
+            key = key.to_pandas().squeeze(axis=1)
+
+        def getitem_array(df, key):
+            return df[key]
+
+        return DataFrameDefault.register(getitem_array)(self, key)
+
+    # END Abstract __getitem__ methods
+
+    # Abstract insert
+    # This method changes the shape of the resulting data. In pandas, this
+    # operation is always inplace, but this object is immutable, so we just
+    # return a new one from here and let the front end handle the inplace
+    # update.
+    def insert(self, loc, column, value):
+        """
+        Insert new column.
+
+        Parameters
+        ----------
+        loc : int
+            Insertion position.
+        column : label
+            Label of the new column.
+        value : One-column BaseQueryCompiler, 1D array or scalar
+            Data to fill new column with.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with new column inserted.
+        """
+        raise NotImplementedError
+
+    # END Abstract insert
+
+    def explode(self, column):
+        """
+        Explode the given columns.
+
+        Parameters
+        ----------
+        column : Union[Hashable, Sequence[Hashable]]
+            The columns to explode.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler that contains the results of execution. For each row
+            in the input QueryCompiler, if the selected columns each contain M
+            items, there will be M rows created by exploding the columns.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.explode)(self, column)
+
+    # END UDF
+
+    # Manual Partitioning methods (e.g. merge, groupby)
+    # These methods require some sort of manual partitioning due to their
+    # nature. They require certain data to exist on the same partition, and
+    # after the shuffle, there should be only a local map required.
+
+    # FIXME: `map_args` and `reduce_args` leaked there from `PandasQueryCompiler.groupby_*`,
+    # pandas storage format implements groupby via TreeReduce approach, but for other storage formats these
+    # parameters make no sense, they shouldn't be present in a base class.
+
+    @doc_utils.doc_groupby_method(
+        action="count non-null values",
+        result="number of non-null values",
+        refer_to="count",
+    )
+    def groupby_count(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.count)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="check whether any element is True",
+        result="boolean of whether there is any element which is True",
+        refer_to="any",
+    )
+    def groupby_any(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.any)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get the minimum value", result="minimum value", refer_to="min"
+    )
+    def groupby_min(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.min)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(result="product", refer_to="prod")
+    def groupby_prod(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.prod)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get the maximum value", result="maximum value", refer_to="max"
+    )
+    def groupby_max(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.max)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="check whether all elements are True",
+        result="boolean of whether all elements are True",
+        refer_to="all",
+    )
+    def groupby_all(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.all)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(result="sum", refer_to="sum")
+    def groupby_sum(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.sum)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get the number of elements",
+        result="number of elements",
+        refer_to="size",
+    )
+    def groupby_size(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        result = GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.size)(
+            self,
+            by=by,
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+            method="size",
+        )
+        if not groupby_kwargs.get("as_index", False):
+            # Renaming 'MODIN_UNNAMED_SERIES_LABEL' to a proper name
+            result.columns = result.columns[:-1].append(pandas.Index(["size"]))
+        return result
+
+    @doc_utils.add_refer_to("GroupBy.aggregate")
+    def groupby_agg(
+        self,
+        by,
+        agg_func,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        how="axis_wise",
+        drop=False,
+    ):
+        raise NotImplementedError  # pragma: no cover
+
+    @doc_utils.doc_groupby_method(
+        action="compute the mean value", result="mean value", refer_to="mean"
+    )
+    def groupby_mean(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="mean",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="compute unbiased skew", result="unbiased skew", refer_to="skew"
+    )
+    def groupby_skew(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="skew",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get cumulative production",
+        result="production of all the previous values",
+        refer_to="cumprod",
+    )
+    def groupby_cumprod(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="cumprod",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="compute standard deviation", result="standard deviation", refer_to="std"
+    )
+    def groupby_std(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="std",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="compute numerical rank", result="numerical rank", refer_to="rank"
+    )
+    def groupby_rank(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="rank",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="compute variance", result="variance", refer_to="var"
+    )
+    def groupby_var(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="var",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get the number of unique values",
+        result="number of unique values",
+        refer_to="nunique",
+    )
+    def groupby_nunique(
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False, **kwargs
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="nunique",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get the median value", result="median value", refer_to="median"
+    )
+    def groupby_median(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="median",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="compute specified quantile",
+        result="quantile value",
+        refer_to="quantile",
+    )
+    def groupby_quantile(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        raise NotImplementedError  # pragma: no cover
+
+    @doc_utils.doc_groupby_method(
+        action="fill NaN values",
+        result="`fill_value` if it was NaN, original value otherwise",
+        refer_to="fillna",
+    )
+    def groupby_fillna(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="fillna",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="get data types", result="data type", refer_to="dtypes"
+    )
+    def groupby_dtypes(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="dtypes",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    @doc_utils.doc_groupby_method(
+        action="shift data with the specified settings",
+        result="shifted value",
+        refer_to="shift",
+    )
+    def groupby_shift(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.groupby_agg(
+            by=by,
+            agg_func="shift",
+            axis=axis,
+            groupby_kwargs=groupby_kwargs,
+            agg_args=agg_args,
+            agg_kwargs=agg_kwargs,
+            drop=drop,
+        )
+
+    # END Manual Partitioning methods
+
+    @doc_utils.add_refer_to("DataFrame.unstack")
+    def unstack(self, level, fill_value):
+        """
+        Pivot a level of the (necessarily hierarchical) index labels.
+
+        Parameters
+        ----------
+        level : int or label
+        fill_value : scalar or dict
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        return DataFrameDefault.register(pandas.DataFrame.unstack)(
+            self, level=level, fill_value=fill_value
+        )
+
+    @doc_utils.add_refer_to("DataFrame.pivot")
+    def pivot(self, index, columns, values):
+        """
+        Produce pivot table based on column values.
+
+        Parameters
+        ----------
+        index : label or list of such, pandas.Index, optional
+        columns : label or list of such
+        values : label or list of such, optional
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing pivot table.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.pivot)(
+            self, index=index, columns=columns, values=values
+        )
+
+    @doc_utils.add_refer_to("DataFrame.pivot_table")
+    def pivot_table(
+        self,
+        index,
+        values,
+        columns,
+        aggfunc,
+        fill_value,
+        margins,
+        dropna,
+        margins_name,
+        observed,
+        sort,
+    ):
+        """
+        Create a spreadsheet-style pivot table from underlying data.
+
+        Parameters
+        ----------
+        index : label, pandas.Grouper, array or list of such
+        values : label, optional
+        columns : column, pandas.Grouper, array or list of such
+        aggfunc : callable(pandas.Series) -> scalar, dict of list of such
+        fill_value : scalar, optional
+        margins : bool
+        dropna : bool
+        margins_name : str
+        observed : bool
+        sort : bool
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        return DataFrameDefault.register(pandas.DataFrame.pivot_table)(
+            self,
+            index=index,
+            values=values,
+            columns=columns,
+            aggfunc=aggfunc,
+            fill_value=fill_value,
+            margins=margins,
+            dropna=dropna,
+            margins_name=margins_name,
+            observed=observed,
+            sort=sort,
+        )
+
+    @doc_utils.add_refer_to("get_dummies")
+    def get_dummies(self, columns, **kwargs):  # noqa: PR02
+        """
+        Convert categorical variables to dummy variables for certain columns.
+
+        Parameters
+        ----------
+        columns : label or list of such
+            Columns to convert.
+        prefix : str or list of such
+        prefix_sep : str
+        dummy_na : bool
+        drop_first : bool
+        dtype : dtype
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with categorical variables converted to dummy.
+        """
+
+        def get_dummies(df, columns, **kwargs):
+            return pandas.get_dummies(df, columns=columns, **kwargs)
+
+        return DataFrameDefault.register(get_dummies)(self, columns=columns, **kwargs)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.repeat")
+    def repeat(self, repeats):
+        """
+        Repeat each element of one-column QueryCompiler given number of times.
+
+        Parameters
+        ----------
+        repeats : int or array of ints
+            The number of repetitions for each element. This should be a
+            non-negative integer. Repeating 0 times will return an empty
+            QueryCompiler.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with repeated elements.
+        """
+        return SeriesDefault.register(pandas.Series.repeat)(self, repeats=repeats)
+
+    def get_axis(self, axis):
+        """
+        Return index labels of the specified axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to return labels on.
+            0 is for index, when 1 is for columns.
+
+        Returns
+        -------
+        pandas.Index
+        """
+        return self.index if axis == 0 else self.columns
+
+    # TODO SNOW-884220: deprecate this function when loc getitem is supported.
+    # Note: reference the latest modin when deprecating this function.
+    def get_positions_from_labels(self, row_loc, col_loc):
+        """
+        Compute index and column positions from their respective locators.
+
+        Inputs to this method are arguments the the pandas user could pass to loc.
+        This function will compute the corresponding index and column positions
+        that the user could equivalently pass to iloc.
+
+        Parameters
+        ----------
+        row_loc : scalar, slice, list, array or tuple
+            Row locator.
+        col_loc : scalar, slice, list, array or tuple
+            Columns locator.
+
+        Returns
+        -------
+        row_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
+            List of index labels.
+        col_lookup : slice(None) if full axis grab, pandas.RangeIndex if repetition is detected, numpy.ndarray otherwise
+            List of columns labels.
+
+        Notes
+        -----
+        Usage of `slice(None)` as a resulting lookup is a hack to pass information about
+        full-axis grab without computing actual indices that triggers lazy computations.
+        Ideally, this API should get rid of using slices as indexers and either use a
+        common ``Indexer`` object or range and ``np.ndarray`` only.
+        """
+        from snowflake.snowpark.modin.pandas.indexing import (
+            boolean_mask_to_numeric,
+            is_boolean_array,
+            is_list_like,
+            is_range_like,
+        )
+
+        lookups = []
+        for axis, axis_loc in enumerate((row_loc, col_loc)):
+            if is_scalar(axis_loc):
+                axis_loc = np.array([axis_loc])
+            if isinstance(axis_loc, slice) or is_range_like(axis_loc):
+                if isinstance(axis_loc, slice) and axis_loc == slice(None):
+                    axis_lookup = axis_loc
+                else:
+                    axis_labels = self.get_axis(axis)
+                    # `slice_indexer` returns a fully-defined numeric slice for a non-fully-defined labels-based slice
+                    axis_lookup = axis_labels.slice_indexer(
+                        axis_loc.start, axis_loc.stop, axis_loc.step
+                    )
+                    # Converting negative indices to their actual positions:
+                    axis_lookup = pandas.RangeIndex(
+                        start=(
+                            axis_lookup.start
+                            if axis_lookup.start >= 0
+                            else axis_lookup.start + len(axis_labels)
+                        ),
+                        stop=(
+                            axis_lookup.stop
+                            if axis_lookup.stop >= 0
+                            else axis_lookup.stop + len(axis_labels)
+                        ),
+                        step=axis_lookup.step,
+                    )
+            elif self.has_multiindex(axis):
+                # `Index.get_locs` raises an IndexError by itself if missing labels were provided,
+                # we don't have to do missing-check for the received `axis_lookup`.
+                if isinstance(axis_loc, pandas.MultiIndex):
+                    axis_lookup = self.get_axis(axis).get_indexer_for(axis_loc)
+                else:
+                    axis_lookup = self.get_axis(axis).get_locs(axis_loc)
+            elif is_boolean_array(axis_loc):
+                axis_lookup = boolean_mask_to_numeric(axis_loc)
+            else:
+                axis_labels = self.get_axis(axis)
+                if is_list_like(axis_loc) and not isinstance(
+                    axis_loc, (np.ndarray, pandas.Index)
+                ):
+                    # `Index.get_indexer_for` works much faster with numpy arrays than with python lists,
+                    # so although we lose some time here on converting to numpy, `Index.get_indexer_for`
+                    # speedup covers the loss that we gain here.
+                    axis_loc = np.array(axis_loc, dtype=axis_labels.dtype)
+                axis_lookup = axis_labels.get_indexer_for(axis_loc)
+                # `Index.get_indexer_for` sets -1 value for missing labels, we have to verify whether
+                # there are any -1 in the received indexer to raise a KeyError here.
+                missing_mask = axis_lookup == -1
+                if missing_mask.any():
+                    missing_labels = (
+                        axis_loc[missing_mask]
+                        if is_list_like(axis_loc)
+                        # If `axis_loc` is not a list-like then we can't select certain
+                        # labels that are missing and so printing the whole indexer
+                        else axis_loc
+                    )
+                    raise KeyError(missing_labels)
+
+            if isinstance(axis_lookup, pandas.Index) and not is_range_like(axis_lookup):
+                axis_lookup = axis_lookup.values
+
+            lookups.append(axis_lookup)
+        return lookups
+
+    @abc.abstractmethod
+    def take_2d_positional(self, index, columns):  # pragma: no cover
+        """
+        Index QueryCompiler with passed keys.
+
+        Parameters
+        ----------
+        index : list-like of ints,
+            Positional indices of rows to grab.
+        columns : list-like of ints,
+            Positional indices of columns to grab.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New masked QueryCompiler.
+        """
+        pass
+
+    # END Abstract methods for QueryCompiler
+
+    @pandas.util.cache_readonly
+    def __constructor__(self):
+        """
+        Get query compiler constructor.
+
+        By default, constructor method will invoke an init.
+
+        Returns
+        -------
+        callable
+        """
+        return type(self)
+
+    # __delitem__
+    # This will change the shape of the resulting data.
+    def delitem(self, key):
+        """
+        Drop `key` column.
+
+        Parameters
+        ----------
+        key : label
+            Column name to drop.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler without `key` column.
+        """
+        return self.drop(columns=[key])
+
+    # END __delitem__
+
+    @abc.abstractmethod
+    def has_multiindex(self, axis=0):  # pragma: no cover
+        pass
+
+    def get_index_name(self, axis=0):
+        # TODO (SNOW-850751): clean this up and add implementation in snowflake query compiler
+        """
+        Get index name of specified axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}, default: 0
+            Axis to get index name on.
+
+        Returns
+        -------
+        hashable
+            Index name, None for MultiIndex.
+        """
+        return self.get_axis(axis).name
+
+    def set_index_name(self, name, axis=0):
+        # TODO (SNOW-850751): clean this up and add implementation in snowflake query compiler
+        """
+        Set index name for the specified axis.
+
+        Parameters
+        ----------
+        name : hashable
+            New index name.
+        axis : {0, 1}, default: 0
+            Axis to set name along.
+        """
+        self.get_axis(axis).name = name
+
+    def get_index_names(self, axis=0):
+        """
+        Get index names of specified axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}, default: 0
+            Axis to get index names on.
+
+        Returns
+        -------
+        list
+            Index names.
+        """
+        raise NotImplementedError
+
+    # DateTime methods
+
+    @doc_utils.doc_dt_round(refer_to="ceil")
+    def dt_ceil(self, freq, ambiguous="raise", nonexistent="raise"):
+        return DateTimeDefault.register(pandas.Series.dt.ceil)(
+            self, freq, ambiguous, nonexistent
+        )
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.components")
+    def dt_components(self):
+        """
+        Spread each date-time value into its components (days, hours, minutes...).
+
+        Returns
+        -------
+        BaseQueryCompiler
+        """
+        return DateTimeDefault.register(pandas.Series.dt.components)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the date without timezone information", refer_to="date"
+    )
+    def dt_date(self):
+        return DateTimeDefault.register(pandas.Series.dt.date)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="day component", refer_to="day")
+    def dt_day(self):
+        return DateTimeDefault.register(pandas.Series.dt.day)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="day name", refer_to="day_name", params="locale : str, optional"
+    )
+    def dt_day_name(self, locale=None):
+        return DateTimeDefault.register(pandas.Series.dt.day_name)(self, locale)
+
+    @doc_utils.doc_dt_timestamp(prop="integer day of week", refer_to="dayofweek")
+    # FIXME: `dt_dayofweek` is an alias for `dt_weekday`, one of them should
+    # be removed (Modin issue #3107).
+    def dt_dayofweek(self):
+        return DateTimeDefault.register(pandas.Series.dt.dayofweek)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="day of year", refer_to="dayofyear")
+    def dt_dayofyear(self):
+        return DateTimeDefault.register(pandas.Series.dt.dayofyear)(self)
+
+    @doc_utils.doc_dt_interval(prop="days", refer_to="days")
+    def dt_days(self):
+        return DateTimeDefault.register(pandas.Series.dt.days)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="number of days in month", refer_to="days_in_month"
+    )
+    # FIXME: `dt_days_in_month` is an alias for `dt_daysinmonth`, one of them should
+    # be removed (Modin issue #3107).
+    def dt_days_in_month(self):
+        return DateTimeDefault.register(pandas.Series.dt.days_in_month)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="number of days in month", refer_to="daysinmonth")
+    def dt_daysinmonth(self):
+        return DateTimeDefault.register(pandas.Series.dt.daysinmonth)(self)
+
+    @doc_utils.doc_dt_period(prop="the timestamp of end time", refer_to="end_time")
+    def dt_end_time(self):
+        return DateTimeDefault.register(pandas.Series.dt.end_time)(self)
+
+    @doc_utils.doc_dt_round(refer_to="floor")
+    def dt_floor(self, freq, ambiguous="raise", nonexistent="raise"):
+        return DateTimeDefault.register(pandas.Series.dt.floor)(
+            self, freq, ambiguous, nonexistent
+        )
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.freq")
+    def dt_freq(self):
+        """
+        Get the time frequency of the underlying time-series data.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing a single value, the frequency of the data.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.freq)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="hour", refer_to="hour")
+    def dt_hour(self):
+        return DateTimeDefault.register(pandas.Series.dt.hour)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether corresponding year is leap",
+        refer_to="is_leap_year",
+    )
+    def dt_is_leap_year(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_leap_year)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the last day of the month",
+        refer_to="is_month_end",
+    )
+    def dt_is_month_end(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_month_end)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the first day of the month",
+        refer_to="is_month_start",
+    )
+    def dt_is_month_start(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_month_start)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the last day of the quarter",
+        refer_to="is_quarter_end",
+    )
+    def dt_is_quarter_end(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_quarter_end)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the first day of the quarter",
+        refer_to="is_quarter_start",
+    )
+    def dt_is_quarter_start(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_quarter_start)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the last day of the year",
+        refer_to="is_year_end",
+    )
+    def dt_is_year_end(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_year_end)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the boolean of whether the date is the first day of the year",
+        refer_to="is_year_start",
+    )
+    def dt_is_year_start(self):
+        return DateTimeDefault.register(pandas.Series.dt.is_year_start)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="microseconds component", refer_to="microsecond")
+    def dt_microsecond(self):
+        return DateTimeDefault.register(pandas.Series.dt.microsecond)(self)
+
+    @doc_utils.doc_dt_interval(prop="microseconds component", refer_to="microseconds")
+    def dt_microseconds(self):
+        return DateTimeDefault.register(pandas.Series.dt.microseconds)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="minute component", refer_to="minute")
+    def dt_minute(self):
+        return DateTimeDefault.register(pandas.Series.dt.minute)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="month component", refer_to="month")
+    def dt_month(self):
+        return DateTimeDefault.register(pandas.Series.dt.month)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="the month name", refer_to="month name", params="locale : str, optional"
+    )
+    def dt_month_name(self, locale=None):
+        return DateTimeDefault.register(pandas.Series.dt.month_name)(self, locale)
+
+    @doc_utils.doc_dt_timestamp(prop="nanoseconds component", refer_to="nanosecond")
+    def dt_nanosecond(self):
+        return DateTimeDefault.register(pandas.Series.dt.nanosecond)(self)
+
+    @doc_utils.doc_dt_interval(prop="nanoseconds component", refer_to="nanoseconds")
+    def dt_nanoseconds(self):
+        return DateTimeDefault.register(pandas.Series.dt.nanoseconds)(self)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.normalize")
+    def dt_normalize(self):
+        """
+        Set the time component of each date-time value to midnight.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing date-time values with midnight time.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.normalize)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="quarter component", refer_to="quarter")
+    def dt_quarter(self):
+        return DateTimeDefault.register(pandas.Series.dt.quarter)(self)
+
+    @doc_utils.doc_dt_period(prop="the fiscal year", refer_to="qyear")
+    def dt_qyear(self):
+        return DateTimeDefault.register(pandas.Series.dt.qyear)(self)
+
+    @doc_utils.doc_dt_round(refer_to="round")
+    def dt_round(self, freq, ambiguous="raise", nonexistent="raise"):
+        return DateTimeDefault.register(pandas.Series.dt.round)(
+            self, freq, ambiguous, nonexistent
+        )
+
+    @doc_utils.doc_dt_timestamp(prop="seconds component", refer_to="second")
+    def dt_second(self):
+        return DateTimeDefault.register(pandas.Series.dt.second)(self)
+
+    @doc_utils.doc_dt_interval(prop="seconds component", refer_to="seconds")
+    def dt_seconds(self):
+        return DateTimeDefault.register(pandas.Series.dt.seconds)(self)
+
+    @doc_utils.doc_dt_period(prop="the timestamp of start time", refer_to="start_time")
+    def dt_start_time(self):
+        return DateTimeDefault.register(pandas.Series.dt.start_time)(self)
+
+    @doc_utils.add_refer_to("Series.dt.strftime")
+    def dt_strftime(self, date_format):
+        """
+        Format underlying date-time data using specified format.
+
+        Parameters
+        ----------
+        date_format : str
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing formatted date-time values.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.strftime)(self, date_format)
+
+    @doc_utils.doc_dt_timestamp(prop="time component", refer_to="time")
+    def dt_time(self):
+        return DateTimeDefault.register(pandas.Series.dt.time)(self)
+
+    @doc_utils.doc_dt_timestamp(
+        prop="time component with timezone information", refer_to="timetz"
+    )
+    def dt_timetz(self):
+        return DateTimeDefault.register(pandas.Series.dt.timetz)(self)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.to_period")
+    def dt_to_period(self, freq=None):
+        """
+        Convert underlying data to the period at a particular frequency.
+
+        Parameters
+        ----------
+        freq : str, optional
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing period data.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.to_period)(self, freq)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.to_pydatetime")
+    def dt_to_pydatetime(self):
+        """
+        Convert underlying data to array of python native ``datetime``.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing 1D array of ``datetime`` objects.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.to_pydatetime)(self)
+
+    # FIXME: there are no references to this method, we should either remove it
+    # or add a call reference at the DataFrame level (Modin issue #3103).
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.to_pytimedelta")
+    def dt_to_pytimedelta(self):
+        """
+        Convert underlying data to array of python native ``datetime.timedelta``.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing 1D array of ``datetime.timedelta``.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.to_pytimedelta)(self)
+
+    @doc_utils.doc_dt_period(
+        prop="the timestamp representation", refer_to="to_timestamp"
+    )
+    def dt_to_timestamp(self):
+        return DateTimeDefault.register(pandas.Series.dt.to_timestamp)(self)
+
+    @doc_utils.doc_dt_interval(prop="duration in seconds", refer_to="total_seconds")
+    def dt_total_seconds(self):
+        return DateTimeDefault.register(pandas.Series.dt.total_seconds)(self)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.tz")
+    def dt_tz(self):
+        """
+        Get the time-zone of the underlying time-series data.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler containing a single value, time-zone of the data.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.tz)(self)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.tz_convert")
+    def dt_tz_convert(self, tz):
+        """
+        Convert time-series data to the specified time zone.
+
+        Parameters
+        ----------
+        tz : str, pytz.timezone
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing values with converted time zone.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.tz_convert)(self, tz)
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.dt.tz_localize")
+    def dt_tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
+        """
+        Localize tz-naive to tz-aware.
+
+        Parameters
+        ----------
+        tz : str, pytz.timezone, optional
+        ambiguous : {"raise", "inner", "NaT"} or bool mask, default: "raise"
+        nonexistent : {"raise", "shift_forward", "shift_backward, "NaT"} or pandas.timedelta, default: "raise"
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing values with localized time zone.
+        """
+        return DateTimeDefault.register(pandas.Series.dt.tz_localize)(
+            self, tz, ambiguous, nonexistent
+        )
+
+    @doc_utils.doc_dt_timestamp(prop="week component", refer_to="week")
+    def dt_week(self):
+        return DateTimeDefault.register(pandas.Series.dt.week)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="integer day of week", refer_to="weekday")
+    def dt_weekday(self):
+        return DateTimeDefault.register(pandas.Series.dt.weekday)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="week of year", refer_to="weekofyear")
+    def dt_weekofyear(self):
+        return DateTimeDefault.register(pandas.Series.dt.weekofyear)(self)
+
+    @doc_utils.doc_dt_timestamp(prop="year component", refer_to="year")
+    def dt_year(self):
+        return DateTimeDefault.register(pandas.Series.dt.year)(self)
+
+    # End of DateTime methods
+
+    # Resample methods
+
+    # FIXME:
+    #   1. Query Compiler shouldn't care about differences between Series and DataFrame
+    #      so `resample_agg_df` and `resample_agg_ser` should be combined (Modin issue #3104).
+    #   2. In DataFrame API `Resampler.aggregate` is an alias for `Resampler.apply`
+    #      we should remove one of these methods: `resample_agg_*` or `resample_app_*` (Modin issue #3107).
+    @doc_utils.doc_resample_agg(
+        action="apply passed aggregation function",
+        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
+        output="function names",
+        refer_to="agg",
+    )
+    def resample_agg_df(self, resample_kwargs, func, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.aggregate)(
+            self, resample_kwargs, func, *args, **kwargs
+        )
+
+    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
+    @doc_utils.doc_resample_agg(
+        action="apply passed aggregation function in a one-column query compiler",
+        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
+        output="function names",
+        refer_to="agg",
+    )
+    def resample_agg_ser(self, resample_kwargs, func, *args, **kwargs):
+        return ResampleDefault.register(
+            pandas.core.resample.Resampler.aggregate, squeeze_self=True
+        )(self, resample_kwargs, func, *args, **kwargs)
+
+    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
+    @doc_utils.doc_resample_agg(
+        action="apply passed aggregation function",
+        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
+        output="function names",
+        refer_to="apply",
+    )
+    def resample_app_df(self, resample_kwargs, func, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.apply)(
+            self, resample_kwargs, func, *args, **kwargs
+        )
+
+    @doc_utils.add_deprecation_warning(replacement_method="resample_agg_df")
+    @doc_utils.doc_resample_agg(
+        action="apply passed aggregation function in a one-column query compiler",
+        params="func : str, dict, callable(pandas.Series) -> scalar, or list of such",
+        output="function names",
+        refer_to="apply",
+    )
+    def resample_app_ser(self, resample_kwargs, func, *args, **kwargs):
+        return ResampleDefault.register(
+            pandas.core.resample.Resampler.apply, squeeze_self=True
+        )(self, resample_kwargs, func, *args, **kwargs)
+
+    def resample_asfreq(self, resample_kwargs, fill_value):
+        """
+        Resample time-series data and get the values at the new frequency.
+
+        Group data into intervals by time-series row/column with
+        a specified frequency and get values at the new frequency.
+
+        Parameters
+        ----------
+        resample_kwargs : dict
+            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
+        fill_value : scalar
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing values at the specified frequency.
+        """
+        return ResampleDefault.register(pandas.core.resample.Resampler.asfreq)(
+            self, resample_kwargs, fill_value
+        )
+
+    # FIXME: `resample_backfill` is an alias for `resample_bfill`, one of these method
+    # should be removed (Modin issue #3107).
+    @doc_utils.doc_resample_fillna(method="back-fill", refer_to="backfill")
+    def resample_backfill(self, resample_kwargs, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.backfill)(
+            self, resample_kwargs, limit
+        )
+
+    @doc_utils.doc_resample_fillna(method="back-fill", refer_to="bfill")
+    def resample_bfill(self, resample_kwargs, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.bfill)(
+            self, resample_kwargs, limit
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="number of non-NA values", refer_to="count", compatibility_params=False
+    )
+    def resample_count(self, resample_kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.count)(
+            self, resample_kwargs
+        )
+
+    # FIXME: `resample_ffill` is an alias for `resample_pad`, one of these method
+    # should be removed (Modin issue #3107).
+    @doc_utils.doc_resample_fillna(method="forward-fill", refer_to="ffill")
+    def resample_ffill(self, resample_kwargs, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.ffill)(
+            self, resample_kwargs, limit
+        )
+
+    # FIXME: we should combine all resample fillna methods into `resample_fillna`
+    # (Modin issue #3107)
+    @doc_utils.doc_resample_fillna(
+        method="specified", refer_to="fillna", params="method : str"
+    )
+    def resample_fillna(self, resample_kwargs, method, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.fillna)(
+            self, resample_kwargs, method, limit
+        )
+
+    @doc_utils.doc_resample_reduce(result="first element", refer_to="first")
+    def resample_first(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.first)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    # FIXME: This function takes Modin DataFrame via `obj` parameter,
+    # we should avoid leaking of the high-level objects to the query compiler level.
+    # (Modin issue #3106)
+    def resample_get_group(self, resample_kwargs, name, obj):
+        """
+        Resample time-series data and get the specified group.
+
+        Group data into intervals by time-series row/column with
+        a specified frequency and get the values of the specified group.
+
+        Parameters
+        ----------
+        resample_kwargs : dict
+            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
+        name : object
+        obj : modin.pandas.DataFrame, optional
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the values from the specified group.
+        """
+        return ResampleDefault.register(pandas.core.resample.Resampler.get_group)(
+            self, resample_kwargs, name, obj
+        )
+
+    @doc_utils.doc_resample_fillna(
+        method="specified interpolation",
+        refer_to="interpolate",
+        params="""
+        method : str
+        axis : {0, 1}
+        limit : int
+        inplace : {False}
+            This parameter serves the compatibility purpose. Always has to be False.
+        limit_direction : {"forward", "backward", "both"}
+        limit_area : {None, "inside", "outside"}
+        downcast : str, optional
+        **kwargs : dict
+        """,
+        overwrite_template_params=True,
+    )
+    def resample_interpolate(
+        self,
+        resample_kwargs,
+        method,
+        axis,
+        limit,
+        inplace,
+        limit_direction,
+        limit_area,
+        downcast,
+        **kwargs,
+    ):
+        return ResampleDefault.register(pandas.core.resample.Resampler.interpolate)(
+            self,
+            resample_kwargs,
+            method,
+            axis,
+            limit,
+            inplace,
+            limit_direction,
+            limit_area,
+            downcast,
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(result="last element", refer_to="last")
+    def resample_last(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.last)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(result="maximum value", refer_to="max")
+    def resample_max(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.max)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(result="mean value", refer_to="mean")
+    def resample_mean(self, resample_kwargs, numeric_only, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.mean)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(result="median value", refer_to="median")
+    def resample_median(self, resample_kwargs, numeric_only, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.median)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(result="minimum value", refer_to="min")
+    def resample_min(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.min)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_fillna(method="'nearest'", refer_to="nearest")
+    def resample_nearest(self, resample_kwargs, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.nearest)(
+            self, resample_kwargs, limit
+        )
+
+    @doc_utils.doc_resample_reduce(result="number of unique values", refer_to="nunique")
+    def resample_nunique(self, resample_kwargs, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.nunique)(
+            self, resample_kwargs, *args, **kwargs
+        )
+
+    # FIXME: Query Compiler shouldn't care about differences between Series and DataFrame
+    # so `resample_ohlc_df` and `resample_ohlc_ser` should be combined (Modin issue #3104).
+    @doc_utils.doc_resample_agg(
+        action="compute open, high, low and close values",
+        output="labels of columns containing computed values",
+        refer_to="ohlc",
+    )
+    def resample_ohlc_df(self, resample_kwargs, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.ohlc)(
+            self, resample_kwargs, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_agg(
+        action="compute open, high, low and close values",
+        output="labels of columns containing computed values",
+        refer_to="ohlc",
+    )
+    def resample_ohlc_ser(self, resample_kwargs, *args, **kwargs):
+        return ResampleDefault.register(
+            pandas.core.resample.Resampler.ohlc, squeeze_self=True
+        )(self, resample_kwargs, *args, **kwargs)
+
+    @doc_utils.doc_resample_fillna(method="'pad'", refer_to="pad")
+    def resample_pad(self, resample_kwargs, limit):
+        return ResampleDefault.register(pandas.core.resample.Resampler.pad)(
+            self, resample_kwargs, limit
+        )
+
+    # FIXME: This method require us to build high-level resampler object
+    # which we shouldn't do at the query compiler. We need to move this at the front.
+    # (Modin issue #3105)
+    @doc_utils.add_refer_to("Resampler.pipe")
+    def resample_pipe(self, resample_kwargs, func, *args, **kwargs):
+        """
+        Resample time-series data and apply aggregation on it.
+
+        Group data into intervals by time-series row/column with
+        a specified frequency, build equivalent ``pandas.Resampler`` object
+        and apply passed function to it.
+
+        Parameters
+        ----------
+        resample_kwargs : dict
+            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
+        func : callable(pandas.Resampler) -> object or tuple(callable, str)
+        *args : iterable
+            Positional arguments to pass to function.
+        **kwargs : dict
+            Keyword arguments to pass to function.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the result of passed function.
+        """
+        return ResampleDefault.register(pandas.core.resample.Resampler.pipe)(
+            self, resample_kwargs, func, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="product",
+        params="""
+        numeric_only: bool
+        min_count : int
+        """,
+        refer_to="prod",
+    )
+    def resample_prod(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.prod)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="quantile", params="q : float", refer_to="quantile"
+    )
+    def resample_quantile(self, resample_kwargs, q, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.quantile)(
+            self, resample_kwargs, q, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="standard error of the mean",
+        params="""
+        ddof : int
+        numeric_only: bool
+        """,
+        refer_to="sem",
+    )
+    def resample_sem(self, resample_kwargs, ddof, numeric_only, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.sem)(
+            self,
+            resample_kwargs,
+            ddof=ddof,
+            numeric_only=numeric_only,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="number of elements in a group", refer_to="size"
+    )
+    def resample_size(self, resample_kwargs, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.size)(
+            self, resample_kwargs, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="standard deviation",
+        params="""
+        ddof : int
+        numeric_only: bool
+        """,
+        refer_to="std",
+    )
+    def resample_std(self, resample_kwargs, ddof, numeric_only, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.std)(
+            self, resample_kwargs, ddof, numeric_only, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="sum",
+        params="""
+        numeric_only: bool
+        min_count : int
+        """,
+        refer_to="sum",
+    )
+    def resample_sum(self, resample_kwargs, numeric_only, min_count, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.sum)(
+            self,
+            resample_kwargs,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            *args,  # noqa: B026
+            **kwargs,
+        )
+
+    def resample_transform(self, resample_kwargs, arg, *args, **kwargs):
+        """
+        Resample time-series data and apply aggregation on it.
+
+        Group data into intervals by time-series row/column with
+        a specified frequency and call passed function on each group.
+        In contrast to ``resample_app_df`` apply function to the whole group,
+        instead of a single axis.
+
+        Parameters
+        ----------
+        resample_kwargs : dict
+            Resample parameters as expected by ``modin.pandas.DataFrame.resample`` signature.
+        arg : callable(pandas.DataFrame) -> pandas.Series
+        *args : iterable
+            Positional arguments to pass to function.
+        **kwargs : dict
+            Keyword arguments to pass to function.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the result of passed function.
+        """
+        return ResampleDefault.register(pandas.core.resample.Resampler.transform)(
+            self, resample_kwargs, arg, *args, **kwargs
+        )
+
+    @doc_utils.doc_resample_reduce(
+        result="variance",
+        params="""
+        ddof : int
+        numeric_only: bool
+        """,
+        refer_to="var",
+    )
+    def resample_var(self, resample_kwargs, ddof, numeric_only, *args, **kwargs):
+        return ResampleDefault.register(pandas.core.resample.Resampler.var)(
+            self, resample_kwargs, ddof, numeric_only, *args, **kwargs
+        )
+
+    # End of Resample methods
+
+    # Str methods
+
+    @doc_utils.doc_str_method(refer_to="capitalize", params="")
+    def str_capitalize(self):
+        return StrDefault.register(pandas.Series.str.capitalize)(self)
+
+    @doc_utils.doc_str_method(
+        refer_to="center",
+        params="""
+        width : int
+        fillchar : str, default: ' '""",
+    )
+    def str_center(self, width, fillchar=" "):
+        return StrDefault.register(pandas.Series.str.center)(self, width, fillchar)
+
+    @doc_utils.doc_str_method(
+        refer_to="contains",
+        params="""
+        pat : str
+        case : bool, default: True
+        flags : int, default: 0
+        na : object, default: np.NaN
+        regex : bool, default: True""",
+    )
+    def str_contains(self, pat, case=True, flags=0, na=np.NaN, regex=True):
+        return StrDefault.register(pandas.Series.str.contains)(
+            self, pat, case, flags, na, regex
+        )
+
+    @doc_utils.doc_str_method(
+        refer_to="count",
+        params="""
+        pat : str
+        flags : int, default: 0
+        **kwargs : dict""",
+    )
+    def str_count(self, pat, flags=0, **kwargs):
+        return StrDefault.register(pandas.Series.str.count)(self, pat, flags, **kwargs)
+
+    @doc_utils.doc_str_method(
+        refer_to="endswith",
+        params="""
+        pat : str
+        na : object, default: np.NaN""",
+    )
+    def str_endswith(self, pat, na=np.NaN):
+        return StrDefault.register(pandas.Series.str.endswith)(self, pat, na)
+
+    @doc_utils.doc_str_method(
+        refer_to="find",
+        params="""
+        sub : str
+        start : int, default: 0
+        end : int, optional""",
+    )
+    def str_find(self, sub, start=0, end=None):
+        return StrDefault.register(pandas.Series.str.find)(self, sub, start, end)
+
+    @doc_utils.doc_str_method(
+        refer_to="findall",
+        params="""
+        pat : str
+        flags : int, default: 0
+        **kwargs : dict""",
+    )
+    def str_findall(self, pat, flags=0, **kwargs):
+        return StrDefault.register(pandas.Series.str.findall)(
+            self, pat, flags, **kwargs
+        )
+
+    @doc_utils.doc_str_method(refer_to="get", params="i : int")
+    def str_get(self, i):
+        return StrDefault.register(pandas.Series.str.get)(self, i)
+
+    @doc_utils.doc_str_method(refer_to="get_dummies", params="sep : str")
+    def str_get_dummies(self, sep):
+        return StrDefault.register(pandas.Series.str.get_dummies)(self, sep)
+
+    @doc_utils.doc_str_method(
+        refer_to="index",
+        params="""
+        sub : str
+        start : int, default: 0
+        end : int, optional""",
+    )
+    def str_index(self, sub, start=0, end=None):
+        return StrDefault.register(pandas.Series.str.index)(self, sub, start, end)
+
+    @doc_utils.doc_str_method(refer_to="isalnum", params="")
+    def str_isalnum(self):
+        return StrDefault.register(pandas.Series.str.isalnum)(self)
+
+    @doc_utils.doc_str_method(refer_to="isalpha", params="")
+    def str_isalpha(self):
+        return StrDefault.register(pandas.Series.str.isalpha)(self)
+
+    @doc_utils.doc_str_method(refer_to="isdecimal", params="")
+    def str_isdecimal(self):
+        return StrDefault.register(pandas.Series.str.isdecimal)(self)
+
+    @doc_utils.doc_str_method(refer_to="isdigit", params="")
+    def str_isdigit(self):
+        return StrDefault.register(pandas.Series.str.isdigit)(self)
+
+    @doc_utils.doc_str_method(refer_to="islower", params="")
+    def str_islower(self):
+        return StrDefault.register(pandas.Series.str.islower)(self)
+
+    @doc_utils.doc_str_method(refer_to="isnumeric", params="")
+    def str_isnumeric(self):
+        return StrDefault.register(pandas.Series.str.isnumeric)(self)
+
+    @doc_utils.doc_str_method(refer_to="isspace", params="")
+    def str_isspace(self):
+        return StrDefault.register(pandas.Series.str.isspace)(self)
+
+    @doc_utils.doc_str_method(refer_to="istitle", params="")
+    def str_istitle(self):
+        return StrDefault.register(pandas.Series.str.istitle)(self)
+
+    @doc_utils.doc_str_method(refer_to="isupper", params="")
+    def str_isupper(self):
+        return StrDefault.register(pandas.Series.str.isupper)(self)
+
+    @doc_utils.doc_str_method(refer_to="join", params="sep : str")
+    def str_join(self, sep):
+        return StrDefault.register(pandas.Series.str.join)(self, sep)
+
+    @doc_utils.doc_str_method(refer_to="len", params="")
+    def str_len(self):
+        return StrDefault.register(pandas.Series.str.len)(self)
+
+    @doc_utils.doc_str_method(
+        refer_to="ljust",
+        params="""
+        width : int
+        fillchar : str, default: ' '""",
+    )
+    def str_ljust(self, width, fillchar=" "):
+        return StrDefault.register(pandas.Series.str.ljust)(self, width, fillchar)
+
+    @doc_utils.doc_str_method(refer_to="lower", params="")
+    def str_lower(self):
+        return StrDefault.register(pandas.Series.str.lower)(self)
+
+    @doc_utils.doc_str_method(refer_to="lstrip", params="to_strip : str, optional")
+    def str_lstrip(self, to_strip=None):
+        return StrDefault.register(pandas.Series.str.lstrip)(self, to_strip)
+
+    @doc_utils.doc_str_method(
+        refer_to="match",
+        params="""
+        pat : str
+        case : bool, default: True
+        flags : int, default: 0
+        na : object, default: np.NaN""",
+    )
+    def str_match(self, pat, case=True, flags=0, na=np.NaN):
+        return StrDefault.register(pandas.Series.str.match)(self, pat, case, flags, na)
+
+    @doc_utils.doc_str_method(
+        refer_to="extract",
+        params="""
+        pat : str
+        flags : int, default: 0
+        expand : bool, default: True""",
+    )
+    def str_extract(self, pat, flags=0, expand=True):
+        return StrDefault.register(pandas.Series.str.extract)(self, pat, flags, expand)
+
+    @doc_utils.doc_str_method(
+        refer_to="extractall",
+        params="""
+           pat : str
+           flags : int, default: 0""",
+    )
+    def str_extractall(self, pat, flags=0):
+        return StrDefault.register(pandas.Series.str.extractall)(self, pat, flags)
+
+    @doc_utils.doc_str_method(
+        refer_to="normalize", params="form : {'NFC', 'NFKC', 'NFD', 'NFKD'}"
+    )
+    def str_normalize(self, form):
+        return StrDefault.register(pandas.Series.str.normalize)(self, form)
+
+    @doc_utils.doc_str_method(
+        refer_to="pad",
+        params="""
+        width : int
+        side : {'left', 'right', 'both'}, default: 'left'
+        fillchar : str, default: ' '""",
+    )
+    def str_pad(self, width, side="left", fillchar=" "):
+        return StrDefault.register(pandas.Series.str.pad)(self, width, side, fillchar)
+
+    @doc_utils.doc_str_method(
+        refer_to="partition",
+        params="""
+        sep : str, default: ' '
+        expand : bool, default: True""",
+    )
+    def str_partition(self, sep=" ", expand=True):
+        return StrDefault.register(pandas.Series.str.partition)(self, sep, expand)
+
+    @doc_utils.doc_str_method(refer_to="removeprefix", params="prefix : str")
+    def str_removeprefix(self, prefix):
+        return StrDefault.register(pandas.Series.str.removeprefix)(self, prefix)
+
+    @doc_utils.doc_str_method(refer_to="removesuffix", params="suffix : str")
+    def str_removesuffix(self, suffix):
+        return StrDefault.register(pandas.Series.str.removesuffix)(self, suffix)
+
+    @doc_utils.doc_str_method(refer_to="repeat", params="repeats : int")
+    def str_repeat(self, repeats):
+        return StrDefault.register(pandas.Series.str.repeat)(self, repeats)
+
+    @doc_utils.doc_str_method(
+        refer_to="replace",
+        params="""
+        pat : str
+        repl : str or callable
+        n : int, default: -1
+        case : bool, optional
+        flags : int, default: 0
+        regex : bool, default: True""",
+    )
+    def str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+        return StrDefault.register(pandas.Series.str.replace)(
+            self, pat, repl, n, case, flags, regex
+        )
+
+    @doc_utils.doc_str_method(
+        refer_to="rfind",
+        params="""
+        sub : str
+        start : int, default: 0
+        end : int, optional""",
+    )
+    def str_rfind(self, sub, start=0, end=None):
+        return StrDefault.register(pandas.Series.str.rfind)(self, sub, start, end)
+
+    @doc_utils.doc_str_method(
+        refer_to="rindex",
+        params="""
+        sub : str
+        start : int, default: 0
+        end : int, optional""",
+    )
+    def str_rindex(self, sub, start=0, end=None):
+        return StrDefault.register(pandas.Series.str.rindex)(self, sub, start, end)
+
+    @doc_utils.doc_str_method(
+        refer_to="rjust",
+        params="""
+        width : int
+        fillchar : str, default: ' '""",
+    )
+    def str_rjust(self, width, fillchar=" "):
+        return StrDefault.register(pandas.Series.str.rjust)(self, width, fillchar)
+
+    @doc_utils.doc_str_method(
+        refer_to="rpartition",
+        params="""
+        sep : str, default: ' '
+        expand : bool, default: True""",
+    )
+    def str_rpartition(self, sep=" ", expand=True):
+        return StrDefault.register(pandas.Series.str.rpartition)(self, sep, expand)
+
+    @doc_utils.doc_str_method(
+        refer_to="rsplit",
+        params="""
+        pat : str, optional
+        n : int, default: -1
+        expand : bool, default: False""",
+    )
+    def str_rsplit(self, pat=None, n=-1, expand=False):
+        return StrDefault.register(pandas.Series.str.rsplit)(self, pat, n, expand)
+
+    @doc_utils.doc_str_method(refer_to="rstrip", params="to_strip : str, optional")
+    def str_rstrip(self, to_strip=None):
+        return StrDefault.register(pandas.Series.str.rstrip)(self, to_strip)
+
+    @doc_utils.doc_str_method(
+        refer_to="slice",
+        params="""
+        start : int, optional
+        stop : int, optional
+        step : int, optional""",
+    )
+    def str_slice(self, start=None, stop=None, step=None):
+        return StrDefault.register(pandas.Series.str.slice)(self, start, stop, step)
+
+    @doc_utils.doc_str_method(
+        refer_to="slice_replace",
+        params="""
+        start : int, optional
+        stop : int, optional
+        repl : str or callable, optional""",
+    )
+    def str_slice_replace(self, start=None, stop=None, repl=None):
+        return StrDefault.register(pandas.Series.str.slice_replace)(
+            self, start, stop, repl
+        )
+
+    @doc_utils.doc_str_method(
+        refer_to="split",
+        params="""
+        pat : str, optional
+        n : int, default: -1
+        expand : bool, default: False""",
+    )
+    def str_split(self, pat=None, n=-1, expand=False, regex=None):
+        return StrDefault.register(pandas.Series.str.split)(
+            self, pat, n=n, expand=expand, regex=regex
+        )
+
+    @doc_utils.doc_str_method(
+        refer_to="startswith",
+        params="""
+        pat : str
+        na : object, default: np.NaN""",
+    )
+    def str_startswith(self, pat, na=np.NaN):
+        return StrDefault.register(pandas.Series.str.startswith)(self, pat, na)
+
+    @doc_utils.doc_str_method(refer_to="strip", params="to_strip : str, optional")
+    def str_strip(self, to_strip=None):
+        return StrDefault.register(pandas.Series.str.strip)(self, to_strip)
+
+    @doc_utils.doc_str_method(refer_to="swapcase", params="")
+    def str_swapcase(self):
+        return StrDefault.register(pandas.Series.str.swapcase)(self)
+
+    @doc_utils.doc_str_method(refer_to="title", params="")
+    def str_title(self):
+        return StrDefault.register(pandas.Series.str.title)(self)
+
+    @doc_utils.doc_str_method(refer_to="translate", params="table : dict")
+    def str_translate(self, table):
+        return StrDefault.register(pandas.Series.str.translate)(self, table)
+
+    @doc_utils.doc_str_method(refer_to="upper", params="")
+    def str_upper(self):
+        return StrDefault.register(pandas.Series.str.upper)(self)
+
+    @doc_utils.doc_str_method(
+        refer_to="wrap",
+        params="""
+        width : int
+        **kwargs : dict""",
+    )
+    def str_wrap(self, width, **kwargs):
+        return StrDefault.register(pandas.Series.str.wrap)(self, width, **kwargs)
+
+    @doc_utils.doc_str_method(refer_to="zfill", params="width : int")
+    def str_zfill(self, width):
+        return StrDefault.register(pandas.Series.str.zfill)(self, width)
+
+    @doc_utils.doc_str_method(
+        refer_to="encode", params="encoding: str, errors: str, optional"
+    )
+    def str_encode(self, encoding, errors="strict"):
+        return StrDefault.register(pandas.Series.str.encode)(self, encoding, errors)
+
+    @doc_utils.doc_str_method(
+        refer_to="decode", params="encoding: str, errors: str, optional"
+    )
+    def str_decode(self, encoding, errors="strict"):
+        return StrDefault.register(pandas.Series.str.decode)(self, encoding, errors)
+
+    @doc_utils.doc_str_method(
+        refer_to="cat",
+        params="""
+               others : Series, Index, DataFrame, np.ndarray or list-like,
+               sep : str, default: '',
+               na_rep : str or None, default: None,
+               join : {'left', 'right', 'outer', 'inner'}, default: 'left'""",
+    )
+    def str_cat(self, others, sep=None, na_rep=None, join="left"):
+        return StrDefault.register(pandas.Series.str.cat)(
+            self, others, sep, na_rep, join
+        )
+
+    @doc_utils.doc_str_method(
+        refer_to="casefold",
+        params="",
+    )
+    def str_casefold(self):
+        return StrDefault.register(pandas.Series.str.casefold)(self)
+
+    @doc_utils.doc_str_method(refer_to="__getitem__", params="key : object")
+    def str___getitem__(self, key):
+        return StrDefault.register(pandas.Series.str.__getitem__)(self, key)
+
+    # End of Str methods
+
+    # Rolling methods
+
+    def shift(
+        self,
+        periods: int = 1,
+        freq=None,
+        axis: Axis = 0,
+        fill_value: Hashable = no_default,
+    ) -> "BaseQueryCompiler":
+
+        # TODO: implement generic Modin version
+        ErrorMessage.not_implemented(  # pragma: no cover
+            "base method shift not implemented"  # pragma: no cover
+        )  # pragma: no cover
+
+    # FIXME: most of the rolling/window methods take *args and **kwargs parameters
+    # which are only needed for the compatibility with numpy, this behavior is inherited
+    # from the API level, we should get rid of it (Modin issue #3108).
+
+    @doc_utils.doc_window_method(
+        result="the result of passed functions",
+        action="apply specified functions",
+        refer_to="aggregate",
+        params="""
+        func : str, dict, callable(pandas.Series) -> scalar, or list of such
+        *args : iterable
+        **kwargs : dict""",
+        build_rules="udf_aggregation",
+    )
+    def rolling_aggregate(self, fold_axis, rolling_args, func, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.aggregate)(
+            self, rolling_args, func, *args, **kwargs
+        )
+
+    # FIXME: at the query compiler method `rolling_apply` is an alias for `rolling_aggregate`,
+    # one of these should be removed (Modin issue #3107).
+    @doc_utils.add_deprecation_warning(replacement_method="rolling_aggregate")
+    @doc_utils.doc_window_method(
+        result="the result of passed function",
+        action="apply specified function",
+        refer_to="apply",
+        params="""
+        func : callable(pandas.Series) -> scalar
+        raw : bool, default: False
+        engine : None, default: None
+            This parameters serves the compatibility purpose. Always has to be None.
+        engine_kwargs : None, default: None
+            This parameters serves the compatibility purpose. Always has to be None.
+        args : tuple, optional
+        kwargs : dict, optional""",
+        build_rules="udf_aggregation",
+    )
+    def rolling_apply(
+        self,
+        fold_axis,
+        rolling_args,
+        func,
+        raw=False,
+        engine=None,
+        engine_kwargs=None,
+        args=None,
+        kwargs=None,
+    ):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.apply)(
+            self, rolling_args, func, raw, engine, engine_kwargs, args, kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="correlation",
+        refer_to="corr",
+        params="""
+        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional
+        pairwise : bool, optional
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_corr(
+        self, fold_axis, rolling_args, other=None, pairwise=None, *args, **kwargs
+    ):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.corr)(
+            self, rolling_args, other, pairwise, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(result="number of non-NA values", refer_to="count")
+    def rolling_count(self, fold_axis, rolling_args):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.count)(
+            self, rolling_args
+        )
+
+    @doc_utils.doc_window_method(
+        result="covariance",
+        refer_to="cov",
+        params="""
+        other : modin.pandas.Series, modin.pandas.DataFrame, list-like, optional
+        pairwise : bool, optional
+        ddof : int, default:  1
+        **kwargs : dict""",
+    )
+    def rolling_cov(
+        self, fold_axis, rolling_args, other=None, pairwise=None, ddof=1, **kwargs
+    ):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.cov)(
+            self, rolling_args, other, pairwise, ddof, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="unbiased kurtosis", refer_to="kurt", params="**kwargs : dict"
+    )
+    def rolling_kurt(self, fold_axis, rolling_args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.kurt)(
+            self, rolling_args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="maximum value",
+        refer_to="max",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_max(self, fold_axis, rolling_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.max)(
+            self, rolling_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="mean value",
+        refer_to="mean",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_mean(self, fold_axis, rolling_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.mean)(
+            self, rolling_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="median value", refer_to="median", params="**kwargs : dict"
+    )
+    def rolling_median(self, fold_axis, rolling_args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.median)(
+            self, rolling_args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="minimum value",
+        refer_to="min",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_min(self, fold_axis, rolling_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.min)(
+            self, rolling_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="quantile",
+        refer_to="quantile",
+        params="""
+        quantile : float
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, default: 'linear'
+        **kwargs : dict""",
+    )
+    def rolling_quantile(
+        self, fold_axis, rolling_args, quantile, interpolation="linear", **kwargs
+    ):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.quantile)(
+            self, rolling_args, quantile, interpolation, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="unbiased skewness", refer_to="skew", params="**kwargs : dict"
+    )
+    def rolling_skew(self, fold_axis, rolling_args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.skew)(
+            self, rolling_args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="standard deviation",
+        refer_to="std",
+        params="""
+        ddof : int, default: 1
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_std(self, fold_axis, rolling_args, ddof=1, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.std)(
+            self, rolling_args, ddof, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="sum",
+        refer_to="sum",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_sum(self, fold_axis, rolling_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.sum)(
+            self, rolling_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        result="variance",
+        refer_to="var",
+        params="""
+        ddof : int, default: 1
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def rolling_var(self, fold_axis, rolling_args, ddof=1, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.rolling.Rolling.var)(
+            self, rolling_args, ddof, *args, **kwargs
+        )
+
+    # End of Rolling methods
+
+    # Window methods
+
+    @doc_utils.doc_window_method(
+        win_type="window of the specified type",
+        result="mean",
+        refer_to="mean",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def window_mean(self, fold_axis, window_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.Window.mean)(
+            self, window_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        win_type="window of the specified type",
+        result="standard deviation",
+        refer_to="std",
+        params="""
+        ddof : int, default: 1
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def window_std(self, fold_axis, window_args, ddof=1, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.Window.std)(
+            self, window_args, ddof, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        win_type="window of the specified type",
+        result="sum",
+        refer_to="sum",
+        params="""
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def window_sum(self, fold_axis, window_args, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.Window.sum)(
+            self, window_args, *args, **kwargs
+        )
+
+    @doc_utils.doc_window_method(
+        win_type="window of the specified type",
+        result="variance",
+        refer_to="var",
+        params="""
+        ddof : int, default: 1
+        *args : iterable
+        **kwargs : dict""",
+    )
+    def window_var(self, fold_axis, window_args, ddof=1, *args, **kwargs):
+        return RollingDefault.register(pandas.core.window.Window.var)(
+            self, window_args, ddof, *args, **kwargs
+        )
+
+    # End of Window methods
+
+    # Categories methods
+
+    @doc_utils.add_one_column_warning
+    @doc_utils.add_refer_to("Series.cat.codes")
+    def cat_codes(self):
+        """
+        Convert underlying categories data into its codes.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the integer codes of the underlying
+            categories.
+        """
+        return CatDefault.register(pandas.Series.cat.codes)(self)
+
+    # End of Categories methods
+
+    # DataFrame methods
+
+    @doc_utils.doc_reduce_agg(
+        method="mean absolute deviation",
+        params="""
+        axis : {0, 1}
+        skipna : bool
+        level : None, default: None
+            Serves the compatibility purpose. Always has to be None.""",
+        refer_to="mad",
+    )
+    def mad(self, axis, skipna, level=None):
+        return DataFrameDefault.register(pandas.DataFrame.mad)(
+            self, axis=axis, skipna=skipna, level=level
+        )
+
+    @doc_utils.doc_reduce_agg(
+        method="unbiased kurtosis", refer_to="kurt", extra_params=["skipna", "**kwargs"]
+    )
+    def kurt(self, axis, level=None, numeric_only=None, skipna=True, **kwargs):
+        return DataFrameDefault.register(pandas.DataFrame.kurt)(
+            self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
+        )
+
+    sum_min_count = sum
+    prod_min_count = prod
+
+    @doc_utils.add_refer_to("DataFrame.compare")
+    def compare(self, other, align_axis, keep_shape, keep_equal, result_names):
+        """
+        Compare data of two QueryCompilers and highlight the difference.
+
+        Parameters
+        ----------
+        other : BaseQueryCompiler
+            Query compiler to compare with. Have to be the same shape and the same
+            labeling as `self`.
+        align_axis : {0, 1}
+        keep_shape : bool
+        keep_equal : bool
+        result_names : tuple
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing the differences between `self` and passed
+            query compiler.
+        """
+        return DataFrameDefault.register(pandas.DataFrame.compare)(
+            self,
+            other=other,
+            align_axis=align_axis,
+            keep_shape=keep_shape,
+            keep_equal=keep_equal,
+            result_names=result_names,
+        )
+
+    def repartition(self, axis=None):
+        """
+        Repartitioning QueryCompiler objects to get ideal partitions inside.
+
+        Allows to improve performance where the query compiler can't improve
+        yet by doing implicit repartitioning.
+
+        Parameters
+        ----------
+        axis : {0, 1, None}, optional
+            The axis along which the repartitioning occurs.
+            `None` is used for repartitioning along both axes.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            The repartitioned BaseQueryCompiler.
+        """
+
+        axes = [0, 1] if axis is None else [axis]
+
+        new_query_compiler = self
+        for _ax in axes:
+            new_query_compiler = new_query_compiler.__constructor__(
+                new_query_compiler._modin_frame.apply_full_axis(
+                    _ax,
+                    lambda df: df,
+                    new_index=self._modin_frame._index_cache,
+                    new_columns=self._modin_frame._columns_cache,
+                    keep_partitioning=False,
+                    sync_labels=False,
+                )
+            )
+        return new_query_compiler
+
+    # End of DataFrame methods
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
new file mode 100644
index 00000000000..2c928fde783
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -0,0 +1,12396 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+import itertools
+import logging
+import re
+from collections.abc import Hashable, Iterable, Mapping, Sequence
+from datetime import tzinfo
+from typing import Any, Callable, Literal, Optional, Union, get_args
+
+import numpy as np
+import numpy.typing as npt
+import pandas as native_pd
+import pandas.core.resample
+from numpy import dtype
+from pandas._libs import lib
+from pandas._libs.lib import no_default
+from pandas._libs.tslibs import Tick
+from pandas._libs.tslibs.offsets import Day
+from pandas._typing import (
+    AggFuncType,
+    AnyArrayLike,
+    Axes,
+    Axis,
+    DateTimeErrorChoices,
+    DtypeBackend,
+    FillnaOptions,
+    IgnoreRaise,
+    IndexKeyFunc,
+    IndexLabel,
+    Level,
+    NaPosition,
+    RandomState,
+    Renamer,
+    Scalar,
+    SortKind,
+    Suffixes,
+)
+from pandas.api.types import (
+    is_bool,
+    is_bool_dtype,
+    is_datetime64_any_dtype,
+    is_integer_dtype,
+    is_numeric_dtype,
+    is_re_compilable,
+    is_scalar,
+    is_string_dtype,
+)
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.common import is_dict_like, is_list_like, pandas_dtype
+from pandas.core.indexes.api import ensure_index
+from pandas.io.formats.format import format_percentiles
+from pandas.io.formats.printing import PrettyDict
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    quote_name_without_upper_casing,
+)
+from snowflake.snowpark._internal.type_utils import ColumnOrName
+from snowflake.snowpark._internal.utils import (
+    generate_random_alphanumeric,
+    parse_table_name,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.column import CaseExpr, Column as SnowparkColumn
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.functions import (
+    abs as abs_,
+    builtin,
+    cast,
+    coalesce,
+    col,
+    concat,
+    count,
+    count_distinct,
+    date_part,
+    date_trunc,
+    dayofmonth,
+    dense_rank,
+    first_value,
+    hour,
+    iff,
+    initcap,
+    is_null,
+    lag,
+    last_value,
+    lead,
+    length,
+    lower,
+    max as max_,
+    min as min_,
+    minute,
+    month,
+    negate,
+    not_,
+    pandas_udf,
+    quarter,
+    rank,
+    regexp_replace,
+    round,
+    row_number,
+    second,
+    sum as sum_,
+    sum_distinct,
+    timestamp_ntz_from_parts,
+    to_date,
+    to_variant,
+    upper,
+    when,
+    year,
+)
+from snowflake.snowpark.modin.core.dataframe.algebra.default2pandas import (
+    BinaryDefault,
+    DataFrameDefault,
+    GroupByDefault,
+    SeriesDefault,
+)
+from snowflake.snowpark.modin.plugin._internal import (
+    concat_utils,
+    generator_utils,
+    join_utils,
+)
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    AGG_NAME_COL_LABEL,
+    AggFuncInfo,
+    AggregateColumnOpParameters,
+    _columns_coalescing_idxmax_idxmin_helper,
+    aggregate_with_ordered_dataframe,
+    check_is_aggregation_supported_in_snowflake,
+    column_quantile,
+    convert_agg_func_arg_to_col_agg_func_map,
+    drop_non_numeric_data_columns,
+    generate_column_agg_info,
+    generate_rowwise_aggregation_function,
+    get_agg_func_to_col_map,
+    get_pandas_aggr_func_name,
+    get_snowflake_agg_func,
+)
+from snowflake.snowpark.modin.plugin._internal.apply_utils import (
+    APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER,
+    APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER,
+    DEFAULT_UDTF_PARTITION_SIZE,
+    check_return_variant_and_get_return_type,
+    create_udf_for_series_apply,
+    create_udtf_for_apply_axis_1,
+    create_udtf_for_groupby_apply,
+    deduce_return_type_from_function,
+    get_metadata_from_groupby_apply_pivot_result_column_names,
+    groupby_apply_create_internal_frame_from_final_ordered_dataframe,
+    groupby_apply_pivot_result_to_final_ordered_dataframe,
+    groupby_apply_sort_method,
+    sort_apply_udtf_result_columns_by_pandas_positions,
+)
+from snowflake.snowpark.modin.plugin._internal.binary_op_utils import (
+    compute_binary_op_between_scalar_and_snowpark_column,
+    compute_binary_op_between_snowpark_column_and_scalar,
+    compute_binary_op_between_snowpark_columns,
+    compute_binary_op_with_fill_value,
+    is_binary_op_supported,
+    merge_label_and_identifier_pairs,
+    prepare_binop_pairs_between_dataframe_and_dataframe,
+)
+from snowflake.snowpark.modin.plugin._internal.cumulative_utils import (
+    get_cumagg_col_to_expr_map_axis0,
+    get_groupby_cumagg_frame_axis0,
+)
+from snowflake.snowpark.modin.plugin._internal.cut_utils import (
+    compute_bin_indices,
+    preprocess_bins_for_cut,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    check_is_groupby_supported_by_snowflake,
+    extract_groupby_column_pandas_labels,
+    get_frame_with_groupby_columns_as_index,
+    get_groups_for_ordered_dataframe,
+    make_groupby_rank_col_for_method,
+    validate_groupby_columns,
+)
+from snowflake.snowpark.modin.plugin._internal.indexing_utils import (
+    ValidIndex,
+    _get_frame_by_row_series_bool,
+    convert_snowpark_row_to_pandas_index,
+    get_frame_by_col_label,
+    get_frame_by_col_pos,
+    get_frame_by_row_label,
+    get_frame_by_row_pos_frame,
+    get_frame_by_row_pos_slice_frame,
+    get_index_frame_by_row_label_slice,
+    get_row_pos_frame_from_row_key,
+    get_snowflake_filter_for_row_label,
+    get_valid_col_pos_list_from_columns,
+    get_valid_index_values,
+    set_frame_2d_labels,
+    set_frame_2d_positional,
+)
+from snowflake.snowpark.modin.plugin._internal.io_utils import (
+    get_columns_to_keep_for_usecols,
+    get_non_pandas_kwargs,
+    is_local_filepath,
+    upload_local_path_to_snowflake_stage,
+)
+from snowflake.snowpark.modin.plugin._internal.isin_utils import (
+    compute_isin_with_dataframe,
+    compute_isin_with_series,
+    convert_values_to_list_of_literals_and_return_type,
+    scalar_isin_expression,
+)
+from snowflake.snowpark.modin.plugin._internal.join_utils import (
+    InheritJoinIndex,
+    JoinKeyCoalesceConfig,
+)
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.pivot_utils import (
+    expand_pivot_result_with_pivot_table_margins,
+    generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings,
+    generate_single_pivot_labels,
+    pivot_helper,
+)
+from snowflake.snowpark.modin.plugin._internal.resample_utils import (
+    IMPLEMENTED_AGG_METHODS,
+    ResampleMethodTypeLit,
+    fill_missing_resample_bins_for_frame,
+    get_expected_resample_bins_frame,
+    get_snowflake_quoted_identifier_for_resample_index_col,
+    perform_asof_join_on_frame,
+    perform_resample_binning_on_frame,
+    rule_to_snowflake_width_and_slice_unit,
+    validate_resample_supported_by_snowflake,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    SnowparkPandasTelemetryField,
+    TelemetryField,
+)
+from snowflake.snowpark.modin.plugin._internal.timestamp_utils import (
+    VALID_TO_DATETIME_DF_KEYS,
+    DateTimeOrigin,
+    generate_timestamp_col,
+    to_datetime_require_fallback,
+    to_snowflake_timestamp_format,
+)
+from snowflake.snowpark.modin.plugin._internal.transpose_utils import (
+    clean_up_transpose_result_index_and_labels,
+    prepare_and_unpivot_for_transpose,
+    transpose_empty_df,
+)
+from snowflake.snowpark.modin.plugin._internal.type_utils import (
+    TypeMapper,
+    column_astype,
+    infer_object_type,
+    is_astype_type_error,
+    is_compatible_snowpark_types,
+)
+from snowflake.snowpark.modin.plugin._internal.unpivot_utils import (
+    unpivot,
+    unpivot_empty_df,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    INDEX_LABEL,
+    ROW_COUNT_COLUMN_LABEL,
+    ROW_POSITION_COLUMN_LABEL,
+    FillNAMethod,
+    TempObjectType,
+    append_columns,
+    cache_result,
+    check_snowpark_pandas_object_in_arg,
+    check_valid_pandas_labels,
+    count_rows,
+    create_frame_with_data_columns,
+    create_ordered_dataframe_from_pandas,
+    create_ordered_dataframe_with_readonly_temp_table,
+    extract_all_duplicates,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+    fill_missing_levels_for_pandas_label,
+    fill_none_in_index_labels,
+    fillna_label_to_value_map,
+    generate_snowflake_quoted_identifiers_helper,
+    get_default_snowpark_pandas_statement_params,
+    get_distinct_rows,
+    get_mapping_from_left_to_right_columns_by_label,
+    get_snowflake_quoted_identifier_to_pandas_label_mapping,
+    is_all_label_components_none,
+    is_duplicate_free,
+    label_prefix_match,
+    pandas_lit,
+    parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label,
+    parse_snowflake_object_construct_identifier_to_map,
+    snowpark_to_pandas_helper,
+)
+from snowflake.snowpark.modin.plugin._internal.where_utils import (
+    validate_expected_boolean_data_columns,
+)
+from snowflake.snowpark.modin.plugin._internal.window_utils import (
+    check_is_window_supported_by_snowflake,
+)
+from snowflake.snowpark.modin.plugin._typing import (
+    DropKeep,
+    JoinTypeLit,
+    ListLike,
+    PandasLabelToSnowflakeIdentifierPair,
+    SnowflakeSupportedFileTypeLit,
+)
+from snowflake.snowpark.modin.plugin.compiler.query_compiler import BaseQueryCompiler
+from snowflake.snowpark.modin.plugin.default2pandas.stored_procedure_utils import (
+    StoredProcedureDefault,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.modin.utils import MODIN_UNNAMED_SERIES_LABEL
+from snowflake.snowpark.session import Session
+from snowflake.snowpark.types import (
+    ArrayType,
+    BinaryType,
+    BooleanType,
+    DataType,
+    DateType,
+    IntegerType,
+    MapType,
+    PandasDataFrameType,
+    PandasSeriesType,
+    StringType,
+    TimestampType,
+    TimeType,
+    VariantType,
+    _IntegralType,
+    _NumericType,
+)
+from snowflake.snowpark.udf import UserDefinedFunction
+from snowflake.snowpark.window import Window
+
+_logger = logging.getLogger(__name__)
+
+# TODO: SNOW-1229442 remove this restriction once bug in quantile is fixed.
+# For now, limit number of quantiles supported in pd.qcut, df.quantiles to avoid producing recursion limit failure in Snowpark.
+MAX_QUANTILES_SUPPORTED: int = 16
+
+
+class SnowflakeQueryCompiler(BaseQueryCompiler):
+    """based on: https://modin.readthedocs.io/en/0.11.0/flow/modin/backends/base/query_compiler.html
+    this class is best explained by looking at https://github.com/modin-project/modin/blob/a8be482e644519f2823668210cec5cf1564deb7e/modin/experimental/core/storage_formats/hdk/query_compiler.py
+    """
+
+    def __init__(self, frame: InternalFrame) -> None:
+        """this stores internally a local pandas object (refactor this)"""
+        assert frame is not None and isinstance(frame, InternalFrame)
+        self._modin_frame = frame
+        # self.snowpark_pandas_api_calls a list of lazy Snowpark pandas telemetry api calls
+        # Copying and modifying self.snowpark_pandas_api_calls is taken care of in telemetry decorators
+        self.snowpark_pandas_api_calls: list = []
+
+    @property
+    def dtypes(self) -> native_pd.Series:
+        """
+        Get columns dtypes.
+
+        Returns
+        -------
+        pandas.Series
+            Series with dtypes of each column.
+        """
+        col_to_type = self._modin_frame.quoted_identifier_to_snowflake_type()
+        types = [
+            TypeMapper.to_pandas(col_to_type[c])
+            for c in self._modin_frame.data_column_snowflake_quoted_identifiers
+        ]
+        return native_pd.Series(
+            data=types, index=self._modin_frame.data_columns_index, dtype=object
+        )
+
+    @property
+    def index_dtypes(self) -> list[Union[dtype, ExtensionDtype]]:
+        """
+        Get index dtypes.
+
+        Returns
+        -------
+        pandas.Series
+            Series with dtypes of each column.
+        """
+        col_to_type = self._modin_frame.quoted_identifier_to_snowflake_type()
+        return [
+            TypeMapper.to_pandas(col_to_type[c])
+            for c in self._modin_frame.index_column_snowflake_quoted_identifiers
+        ]
+
+    @classmethod
+    def from_pandas(
+        cls, df: native_pd.DataFrame, *args: Any, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        # create copy of original dataframe
+        df = df.copy()
+        # encode column labels to snowflake compliant strings.
+        # If df.columns is a MultiIndex, it will become a list of tuples
+        original_column_labels = df.columns.tolist()
+        # if name is not set, df.columns.names will return FrozenList[None].
+        original_column_index_names = df.columns.names
+
+        # session.create_dataframe creates a temporary snowflake table from given pandas dataframe. Snowflake
+        # tables do not support duplicate column names hence column names of pandas dataframe here must be de-duplicated
+        # before passing this dataframe to create_dataframe() method. We de-duplicate pandas dataframe column names in
+        # following two steps:
+        # 1. Generate snowflake quoted identifiers which are duplicate free.
+        # 2. Extract pandas labels from generated snowflake quoted identifiers and update columns of original dataframe.
+        # Note: In our internal frame mapping we will continue to use original pandas labels (which may have duplicates)
+        data_column_snowflake_quoted_identifiers = (
+            generate_snowflake_quoted_identifiers_helper(
+                pandas_labels=original_column_labels, excluded=[]
+            )
+        )
+        # Extract pandas labels from snowflake quoted identifiers and reassign these new labels to pandas dataframe
+        # before writing to temporary table.
+        df.columns = [
+            extract_pandas_label_from_snowflake_quoted_identifier(identifier)
+            for identifier in data_column_snowflake_quoted_identifiers
+        ]
+        # Generate snowflake quoted identifier for index columns
+        original_index_pandas_labels = df.index.names
+        index_snowflake_quoted_identifiers = (
+            generate_snowflake_quoted_identifiers_helper(
+                pandas_labels=fill_none_in_index_labels(original_index_pandas_labels),
+                excluded=data_column_snowflake_quoted_identifiers,
+                wrap_double_underscore=True,
+            )
+        )
+        current_df_data_column_snowflake_quoted_identifiers = (
+            index_snowflake_quoted_identifiers
+            + data_column_snowflake_quoted_identifiers
+        )
+
+        # reset index so the index can be a data column in the native pandas df
+        # this is because write_pandas in python connector will not write the
+        # index column into Snowflake
+        # See https://github.com/snowflakedb/snowflake-connector-python/blob/main/src/snowflake/connector/pandas_tools.py
+        df.reset_index(
+            inplace=True,
+            names=[
+                extract_pandas_label_from_snowflake_quoted_identifier(identifier)
+                for identifier in index_snowflake_quoted_identifiers
+            ],
+        )
+        # need to keep row_position column (or expression in the future)
+        # i.e., when https://snowflakecomputing.atlassian.net/browse/SNOW-767687 is done,
+        # replace column with expression
+        row_position_snowflake_quoted_identifier = (
+            generate_snowflake_quoted_identifiers_helper(
+                pandas_labels=[ROW_POSITION_COLUMN_LABEL],
+                excluded=current_df_data_column_snowflake_quoted_identifiers,
+                wrap_double_underscore=True,
+            )[0]
+        )
+
+        df[
+            extract_pandas_label_from_snowflake_quoted_identifier(
+                row_position_snowflake_quoted_identifier
+            )
+        ] = np.arange(len(df))
+
+        current_df_data_column_snowflake_quoted_identifiers.append(
+            row_position_snowflake_quoted_identifier
+        )
+
+        # create snowpark df
+        ordered_dataframe = create_ordered_dataframe_from_pandas(
+            df,
+            snowflake_quoted_identifiers=current_df_data_column_snowflake_quoted_identifiers,
+            ordering_columns=[
+                OrderingColumn(row_position_snowflake_quoted_identifier),
+            ],
+            row_position_snowflake_quoted_identifier=row_position_snowflake_quoted_identifier,
+        )
+
+        # construct the internal frame for the dataframe
+        return cls(
+            InternalFrame.create(
+                ordered_dataframe=ordered_dataframe,
+                data_column_pandas_labels=original_column_labels,
+                data_column_pandas_index_names=original_column_index_names,
+                data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+                index_column_pandas_labels=original_index_pandas_labels,
+                index_column_snowflake_quoted_identifiers=index_snowflake_quoted_identifiers,
+            )
+        )
+
+    @classmethod
+    def from_arrow(cls, at: Any, *args: Any, **kwargs: Any) -> "SnowflakeQueryCompiler":
+        return cls(at.to_pandas())
+
+    def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True) -> None:
+        pass
+
+    @classmethod
+    def from_dataframe(cls, df: native_pd.DataFrame, data_cls: Any) -> None:
+        pass
+
+    @classmethod
+    def from_date_range(
+        cls,
+        start: Optional[pd.Timestamp],
+        end: Optional[pd.Timestamp],
+        periods: Optional[int],
+        freq: Optional[pd.DateOffset],
+        tz: Union[str, tzinfo],
+        left_inclusive: bool,
+        right_inclusive: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Snowpark pandas implementation for generating date ranges.
+
+        Args:
+            start : Timestamp, optional
+                Left bound for generating dates.
+            end : Timestamp, optional
+                Right bound for generating dates.
+            periods : int
+                Number of periods to generate.
+            freq : str or DateOffset
+                Frequency strings can have multiples, e.g. '5H'. See
+                :ref:`here <timeseries.offset_aliases>` for a list of
+                frequency aliases.
+            tz : str or tzinfo
+                Time zone name for returning localized DatetimeIndex, for example
+                'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
+                timezone-naive.
+            left_inclusive : bool
+                Whether to include left boundary.
+            right_inclusive : bool
+                Whether to include right boundary.
+        Returns:
+            A series with generated datetime values in the target range
+        """
+        assert freq is not None or not any(
+            x is None for x in [periods, start, end]
+        ), "Must provide freq argument if no data is supplied"
+
+        if tz is not None:
+            # TODO: SNOW-879476 support tz with other tz APIs
+            ErrorMessage.not_implemented("tz is not supported.")
+
+        if freq is not None:
+            # We break Day arithmetic (fixed 24 hour) here and opt for
+            # Day to mean calendar day (23/24/25 hour). Therefore, strip
+            # tz info from start and day to avoid DST arithmetic
+            if isinstance(freq, Day):
+                if start is not None:
+                    start = start.tz_localize(None)
+                if end is not None:
+                    end = end.tz_localize(None)
+            if isinstance(freq, Tick):
+                # generate nanosecond values
+                ns_values = generator_utils.generate_regular_range(
+                    start, end, periods, freq
+                )
+                dt_values = ns_values.series_to_datetime()
+            else:
+                dt_values = generator_utils.generate_irregular_range(
+                    start, end, periods, freq
+                )
+        else:
+            # Create a linearly spaced date_range in local time
+            # This is the original pandas source code:
+            # i8values = (
+            #   np.linspace(0, end.value - start.value, periods, dtype="int64")
+            #   + start.value
+            # )
+            # Here we implement it similarly as np.linspace
+            div = periods - 1  # type: ignore[operator]
+            delta = end.value * 1.0 - start.value  # type: ignore[union-attr]
+            if div == 0:
+                # Only 1 period, just return the start value
+                ns_values = pd.Series([start.value])._query_compiler  # type: ignore[union-attr]
+            else:
+                stride = delta / div
+                # Make sure end is included in this case
+                e = start.value + delta // stride * stride + stride // 2 + 1  # type: ignore[union-attr]
+                ns_values = generator_utils.generate_range(start.value, e, stride)  # type: ignore[union-attr]
+            dt_values = ns_values.series_to_datetime()
+
+        dt_series = pd.Series(query_compiler=dt_values)
+        if not left_inclusive or not right_inclusive:
+            if not left_inclusive and start is not None:
+                dt_series = dt_series[dt_series != start].reset_index(drop=True)
+            if not right_inclusive and end is not None:
+                # No need to reset_index since we only removed the tail
+                dt_series = dt_series[dt_series != end]
+        return dt_series._query_compiler
+
+    def copy(self) -> "SnowflakeQueryCompiler":
+        """
+        Make a copy of this object.
+
+        Returns:
+            An instance of Snowflake query compiler.
+        """
+        # InternalFrame is immutable, it's safe to use same underlying instance for
+        # multiple query compilers.
+        qc = SnowflakeQueryCompiler(self._modin_frame)
+        qc.snowpark_pandas_api_calls = self.snowpark_pandas_api_calls.copy()
+        return qc
+
+    def to_pandas(
+        self,
+        *,
+        statement_params: Optional[dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> native_pd.DataFrame:
+        """
+        Convert underlying query compilers data to ``pandas.DataFrame``.
+
+        Args:
+            statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+        Returns:
+        pandas.DataFrame
+            The QueryCompiler converted to pandas.
+
+        """
+        ordered_dataframe = self._modin_frame.ordered_dataframe.select(
+            self._modin_frame.index_column_snowflake_quoted_identifiers
+            + self._modin_frame.data_column_snowflake_quoted_identifiers
+        )
+
+        native_df = snowpark_to_pandas_helper(
+            ordered_dataframe, statement_params=statement_params, **kwargs
+        )
+
+        # to_pandas() does not preserve the index information and will just return a
+        # RangeIndex. Therefore, we need to set the index column manually
+        native_df.set_index(
+            [
+                extract_pandas_label_from_snowflake_quoted_identifier(identifier)
+                for identifier in self._modin_frame.index_column_snowflake_quoted_identifiers
+            ],
+            inplace=True,
+        )
+        # set index name
+        native_df.index = native_df.index.set_names(
+            self._modin_frame.index_column_pandas_labels
+        )
+
+        # set column names and potential casting
+        native_df.columns = self._modin_frame.data_columns_index
+        return native_df
+
+    def finalize(self) -> None:
+        pass
+
+    def free(self) -> None:
+        pass
+
+    def to_numpy(
+        self,
+        dtype: Optional[npt.DTypeLike] = None,
+        na_value: object = lib.no_default,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        # the modin version which has been forked here already supports an experimental numpy backend.
+        # i.e., for something like df.values internally to_numpy().flatten() is called
+        # with flatten being another query compiler call into the numpy frontend layer.
+        # here it's overwritten to actually perform numpy conversion, i.e. return an actual numpy object
+        return self.to_pandas().to_numpy(dtype=dtype, na_value=na_value, **kwargs)
+
+    def repartition(self, axis: Any = None) -> "SnowflakeQueryCompiler":
+        # let Snowflake handle partitioning, it makes no sense to repartition the dataframe.
+        return self
+
+    def default_to_pandas(
+        self, pandas_op: Callable, *args: Any, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        func_name = pandas_op.__name__
+
+        # this is coming from Modin's encoding scheme in default.py:build_default_to_pandas
+        # encoded as f"<function {cls.OBJECT_TYPE}.{fn_name}>"
+        # extract DataFrame operation, following extraction fails if not adhering to above format
+        object_type, fn_name = func_name[len("<function ") : -1].split(".")
+
+        # in case it's a column wise or other unspecified operation, use a stored procedure which
+        # will materialize everything as a pandas DataFrame within Snowflake.
+        # Because the Snowflake executor has limited memory, this fallback likely will fail for large datasets.
+        return_query_compiler = StoredProcedureDefault.register(
+            self._modin_frame, pandas_op, args, kwargs
+        )
+        return_query_compiler.snowpark_pandas_api_calls.append(
+            {
+                TelemetryField.NAME.value: f"{object_type}.{fn_name}",
+                SnowparkPandasTelemetryField.IS_FALLBACK.value: True,
+            }
+        )
+
+        return return_query_compiler
+
+    @classmethod
+    def from_snowflake(
+        cls,
+        name_or_query: Union[str, Iterable[str]],
+        index_col: Optional[Union[str, list[str]]] = None,
+        columns: Optional[list[str]] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        See detailed docstring and examples in ``read_snowflake`` in frontend layer:
+        src/snowflake/snowpark/modin/pandas/io.py
+        """
+        if columns is not None and not isinstance(columns, list):
+            raise ValueError("columns must be provided as list, i.e ['A'].")
+
+        # create ordered dataframe with all columns in a read only table first
+        (
+            ordered_dataframe,
+            row_position_snowflake_quoted_identifier,
+        ) = create_ordered_dataframe_with_readonly_temp_table(
+            table_name_or_query=name_or_query
+        )
+        pandas_labels_to_snowflake_quoted_identifiers_map = {
+            # pandas labels of resulting Snowpark pandas dataframe will be snowflake identifier
+            # after stripping quotes. row_position is not included
+            extract_pandas_label_from_snowflake_quoted_identifier(
+                identifier
+            ): identifier
+            for identifier in ordered_dataframe.projected_column_snowflake_quoted_identifiers
+            if identifier != row_position_snowflake_quoted_identifier
+        }
+
+        def find_snowflake_quoted_identifier(pandas_columns: list[str]) -> list[str]:
+            """
+            Returns the corresponding snowflake_quoted_identifier of column represented by
+            a Python string if its value match the pandas label extracted from
+            snowflake_quoted_identifier.
+            """
+            result = []
+            for column in pandas_columns:
+                if column not in pandas_labels_to_snowflake_quoted_identifiers_map:
+                    raise KeyError(
+                        f"{column} is not in existing snowflake columns {list(pandas_labels_to_snowflake_quoted_identifiers_map.values())}"
+                    )
+                result.append(pandas_labels_to_snowflake_quoted_identifiers_map[column])
+            return result
+
+        # find index columns from snowflake table
+        # if not specified, index_column_snowflake_quoted_identifiers will be
+        # row_position_snowflake_quoted_identifier and its label will be None,
+        # which will be set at the end of this method.
+        index_column_pandas_labels = []
+        index_column_snowflake_quoted_identifiers = []
+        if index_col:
+            if isinstance(index_col, str):
+                index_col = [index_col]
+            index_column_pandas_labels = index_col
+            index_column_snowflake_quoted_identifiers = (
+                find_snowflake_quoted_identifier(index_col)
+            )
+
+        # find data columns from snowflake table
+        if columns:
+            data_column_pandas_labels = columns
+            data_column_snowflake_quoted_identifiers = find_snowflake_quoted_identifier(
+                data_column_pandas_labels
+            )
+        else:
+            # if not specified, data_column_pandas_labels will be
+            # all columns in the snowflake table except index columns and row position column
+            data_column_pandas_labels = []
+            data_column_snowflake_quoted_identifiers = []
+            for (
+                label,
+                identifier,
+            ) in pandas_labels_to_snowflake_quoted_identifiers_map.items():
+                if identifier not in index_column_snowflake_quoted_identifiers:
+                    data_column_pandas_labels.append(label)
+                    data_column_snowflake_quoted_identifiers.append(identifier)
+
+        # when there are duplicates in snowflake identifiers, we need to deduplicate
+        snowflake_quoted_identifiers_to_be_selected = (
+            index_column_snowflake_quoted_identifiers
+            + data_column_snowflake_quoted_identifiers
+        )
+        if len(snowflake_quoted_identifiers_to_be_selected) != len(
+            set(snowflake_quoted_identifiers_to_be_selected)
+        ):
+            pandas_labels_to_be_selected = (
+                index_column_pandas_labels + data_column_pandas_labels
+            )
+            snowflake_quoted_identifiers_to_be_renamed = (
+                generate_snowflake_quoted_identifiers_helper(
+                    pandas_labels=pandas_labels_to_be_selected,
+                    excluded=[row_position_snowflake_quoted_identifier],
+                )
+            )
+
+            # get all columns we want to select with renaming duplicate columns in snowpark df
+            ordered_dataframe = ordered_dataframe.select(
+                [row_position_snowflake_quoted_identifier]
+                + [
+                    old_identifier
+                    if old_identifier == new_identifier
+                    else col(old_identifier).as_(new_identifier)
+                    for old_identifier, new_identifier in zip(
+                        snowflake_quoted_identifiers_to_be_selected,
+                        snowflake_quoted_identifiers_to_be_renamed,
+                    )
+                ]
+            )
+
+            # get the index column and data column snowflake identifiers again
+            # after deduplication and renaming
+            num_index_columns = len(index_column_snowflake_quoted_identifiers)
+            index_column_snowflake_quoted_identifiers = (
+                snowflake_quoted_identifiers_to_be_renamed[:num_index_columns]
+            )
+            data_column_snowflake_quoted_identifiers = (
+                snowflake_quoted_identifiers_to_be_renamed[num_index_columns:]
+            )
+
+        # set index column to row position column when index_col is not specified
+        if not index_col:
+            index_column_pandas_labels = [None]  # type: ignore[list-item]
+            index_column_snowflake_quoted_identifiers = [
+                row_position_snowflake_quoted_identifier
+            ]
+
+        return cls(
+            InternalFrame.create(
+                ordered_dataframe=ordered_dataframe,
+                data_column_pandas_labels=data_column_pandas_labels,
+                data_column_pandas_index_names=[
+                    None
+                ],  # no index names from snowflake table
+                data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+                index_column_pandas_labels=index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    @classmethod
+    def from_file(
+        cls,
+        filetype: SnowflakeSupportedFileTypeLit,
+        path: str,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Returns a SnowflakeQueryCompiler whose internal frame holds the data read from
+        a file or multiple files.
+
+        If the specified file(s) are found locally, they will be uploaded to a
+        stage in Snowflake and parsed there.
+
+        See details of parameters and examples in frontend layer:
+        src/snowflake/snowpark/modin/frontend/io.py
+        """
+
+        stage_location = path
+
+        session = pd.session
+
+        if is_local_filepath(path):
+            snowpandas_prefix = "SNOWPARK_PANDAS"
+            stage_prefix = generate_random_alphanumeric()
+            stage_name = session.get_session_stage()
+            stage_location = f"{stage_name}/{snowpandas_prefix}/{stage_prefix}"
+            upload_local_path_to_snowflake_stage(session, path, stage_location)
+
+        snowpark_reader_kwargs = get_non_pandas_kwargs(kwargs)
+
+        # INFER_SCHEMA must always be true as it is not possible as
+        # users would need to pass in both column names and their
+        # data types to constitute a manually provided schema.
+        snowpark_reader_kwargs["INFER_SCHEMA"] = True
+        try:
+            snowpark_df: SnowparkDataFrame = getattr(
+                session.read.options(snowpark_reader_kwargs), filetype
+            )(stage_location)
+        except FileNotFoundError:
+            # Return empty dataframe, Snowpark uses FileNotFoundError to indicate both missing file and
+            # empty file. Staging above would detect missing file, so return empty dataframe here.
+            return SnowflakeQueryCompiler.from_pandas(native_pd.DataFrame())
+
+        # TODO: SNOW-937665
+        # Unsupported Column Name '$1' when saving a Snowpark Dataframe to Snowflake.
+        if snowpark_df.columns == ["$1"]:
+            snowpark_df = snowpark_df.rename("$1", "COLUMN1")  # pragma: no cover
+
+        temporary_table_name = random_name_for_temp_object(TempObjectType.TABLE)
+
+        # TODO: SNOW-1045261 Pull save_as_table function into OrderedDataFrame so we don't have to set statement_params
+        # here
+        snowpark_df.write.save_as_table(
+            temporary_table_name,
+            mode="errorifexists",
+            table_type="temporary",
+            statement_params=get_default_snowpark_pandas_statement_params(),
+        )
+
+        qc = cls.from_snowflake(name_or_query=temporary_table_name)
+
+        if not kwargs.get("parse_header", True):
+            # Rename df header since default header in pandas is
+            # 0, 1, 2, ... n.  while default header in SF is c1, c2, ... cn.
+            columns_renamed = {
+                column_name: index for index, column_name in enumerate(qc.columns)
+            }
+            qc = qc.rename(columns_renamer=columns_renamed)
+
+        names = kwargs.get("names", None)
+
+        if names is not None:
+            if len(names) > len(qc.columns):
+                raise ValueError(
+                    f"Too many columns specified: expected {len(names)} and found {len(qc.columns)}"
+                )
+
+            # Transform unnamed data columns into an index/multi-index column(s).
+            if len(names) < len(qc.columns):
+                unnamed_indexes = [
+                    column for column in qc.columns[: len(qc.columns) - len(names)]
+                ]
+
+                qc = qc.set_index(unnamed_indexes).set_index_names(
+                    [None] * len(unnamed_indexes)
+                )
+
+            # Apply names to the rightmost columns.
+            columns_renamer = {}
+
+            for idx, column in enumerate(qc.columns[len(qc.columns) - len(names) :]):
+                columns_renamer[column] = names[idx]
+
+            qc = qc.rename(columns_renamer=columns_renamer)
+
+        usecols = kwargs.get("usecols", None)
+
+        if usecols is not None:
+            maintain_usecols_order = filetype != "csv"
+            frame = create_frame_with_data_columns(
+                qc._modin_frame,
+                get_columns_to_keep_for_usecols(
+                    usecols, qc.columns, maintain_usecols_order=maintain_usecols_order
+                ),
+            )
+            qc = SnowflakeQueryCompiler(frame)
+
+        dtype_ = kwargs.get("dtype", None)
+        if dtype_ is not None:
+            if not isinstance(dtype_, dict):
+                dtype_ = {column: dtype_ for column in qc.columns}
+
+            qc = qc.astype(dtype_)
+
+        index_col = kwargs.get("index_col", None)
+        if index_col:
+            pandas_labeled_index_cols = []
+            for column in index_col:
+                if isinstance(column, str):
+                    if column not in qc.columns:
+                        raise ValueError(f"Index {column} invalid")
+                    pandas_labeled_index_cols.append(column)
+                else:
+                    if column < 0:
+                        column += len(qc.columns)
+
+                    if column not in range(len(qc.columns)):
+                        raise IndexError("list index is out of range")
+                    pandas_labeled_index_cols.append(qc.columns[column])
+
+            if len(set(pandas_labeled_index_cols)) != len(pandas_labeled_index_cols):
+                raise ValueError("Duplicate columns in index_col are not allowed.")
+
+            if len(pandas_labeled_index_cols) != 0:
+                qc = qc.set_index(pandas_labeled_index_cols)  # type: ignore[arg-type]
+
+        return qc
+
+    def _to_snowpark_dataframe_from_snowpark_pandas_dataframe(
+        self,
+        index: bool = True,
+        index_label: Optional[IndexLabel] = None,
+    ) -> SnowparkDataFrame:
+        """
+        Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting
+        all index columns of the Snowpark pandas Dataframe if index=True, and also all data columns.
+        For example:
+        With a Snowpark pandas Dataframe (df) has index=[`A`, `B`], columns = [`C`, `D`],
+        the result Snowpark Dataframe after calling _to_snowpark_dataframe_from_snowpark_pandas_dataframe(index=True),
+        will have columns [`A`, `B`, `C`, `D`].
+
+        Checks are performed for pandas labels that will lead to invalid Snowflake identifiers. Example of pandas
+        labels that can result in invalid Snowflake identifiers are None and duplicated labels.
+
+        Note that Once converted to Snowpark Dataframe, ordering information will be lost, and there is no ordering
+        guarantee when displaying the Snowpark Dataframe result.
+
+        Args:
+            index: bool, default True
+                whether to include the index column in the final dataframe
+            index_label: Optional[IndexLabel], default None
+                the new label used for the index columns, the length must be the same as the number of index column
+                of the current dataframe. If None, the original index name is used.
+
+        Returns:
+            SnowparkDataFrame
+                The SnowparkDataFrame contains index columns if retained (index=True) and all data columns
+        Raises:
+            ValueError if duplicated labels occur among the index and data columns because snowflake doesn't allow
+                    duplicated identifiers.
+            ValueError if index/data column label is None, because snowflake column requires a column identifier.
+        """
+
+        index_column_labels = []
+        if index:
+            # Include index columns
+            if index_label:
+                index_column_labels = (
+                    index_label if isinstance(index_label, list) else [index_label]
+                )
+                if len(index_column_labels) != self._modin_frame.num_index_columns:
+                    raise ValueError(
+                        f"Length of 'index_label' should match number of levels, which is {self._modin_frame.num_index_columns}"
+                    )
+            else:
+                index_column_labels = self._modin_frame.index_column_pandas_labels
+
+        data_column_labels = self._modin_frame.data_column_pandas_labels
+        if self._modin_frame.is_unnamed_series():
+            # this is an unnamed Snowpark pandas series, there is no customer visible pandas
+            # label for the data column, set the label to be None
+            data_column_labels = [None]
+
+        # check if there is any data column label is none
+        if any(is_all_label_components_none(label) for label in data_column_labels):
+            raise ValueError(
+                f"Label None is found in the data columns {data_column_labels}, which is invalid in Snowflake. "
+                "Please give it a name by set the dataframe columns like df.columns=['A', 'B'],"
+                " or set the series name if it is a series like series.name='A'."
+            )
+        if any(is_all_label_components_none(label) for label in index_column_labels):
+            raise ValueError(
+                f"Label None is found in the index columns {index_column_labels}, which is invalid in Snowflake. "
+                "Please give it a name by passing index_label arguments."
+            )
+
+        # perform a column name duplication check
+        index_and_data_columns = data_column_labels + index_column_labels
+        duplicates = extract_all_duplicates(index_and_data_columns)
+        if duplicates:
+            raise ValueError(
+                f"Duplicated labels {duplicates} found in index columns {index_column_labels} and data columns {data_column_labels}. "
+                f"Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication "
+                f"among both index and data columns."
+            )
+
+        # rename snowflake quoted identifiers for the retained index columns and data columns to
+        # be the same as quoted pandas labels.
+        rename_mapper: dict[str, str] = {}
+        identifiers_to_retain: list[str] = []
+        # if index is true, retain both index + data column identifiers in order, otherwise, only retain
+        # the data column identifiers
+        if index:
+            identifiers_to_retain.extend(
+                self._modin_frame.index_column_snowflake_quoted_identifiers
+            )
+        identifiers_to_retain.extend(
+            self._modin_frame.data_column_snowflake_quoted_identifiers
+        )
+        for pandas_label, snowflake_identifier in zip(
+            index_column_labels + data_column_labels,
+            identifiers_to_retain,
+        ):
+            snowflake_quoted_identifier_to_save = quote_name_without_upper_casing(
+                f"{pandas_label}"
+            )
+            rename_mapper[snowflake_identifier] = snowflake_quoted_identifier_to_save
+
+        # first do a select to project out all unnecessary columns, then rename to avoid conflict
+        ordered_dataframe = self._modin_frame.ordered_dataframe.select(
+            identifiers_to_retain
+        )
+        return ordered_dataframe.to_projected_snowpark_dataframe(
+            col_mapper=rename_mapper
+        )
+
+    def to_snowflake(
+        self,
+        name: Union[str, Iterable[str]],
+        if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
+        index: bool = True,
+        index_label: Optional[IndexLabel] = None,
+        table_type: Literal["", "temp", "temporary", "transient"] = "",
+    ) -> None:
+        if if_exists not in ("fail", "replace", "append"):
+            # Same error message as native pandas.
+            raise ValueError(f"'{if_exists}' is not valid for if_exists")
+        if if_exists == "fail":
+            mode = "errorifexists"
+        elif if_exists == "replace":
+            mode = "overwrite"
+        else:
+            mode = "append"
+
+        if mode == "errorifexists" and pd.session._table_exists(
+            parse_table_name(name) if isinstance(name, str) else name
+        ):
+            raise ValueError(f"Table '{name}' already exists")
+
+        self._to_snowpark_dataframe_from_snowpark_pandas_dataframe(
+            index, index_label
+        ).write.save_as_table(
+            name,
+            mode=mode,
+            table_type=table_type,
+            statement_params=get_default_snowpark_pandas_statement_params(),
+        )
+
+    def to_snowpark(
+        self, index: bool = True, index_label: Optional[IndexLabel] = None
+    ) -> SnowparkDataFrame:
+        """
+        Convert the Snowpark pandas Dataframe to Snowpark Dataframe. The Snowpark Dataframe is created by selecting
+        all index columns of the Snowpark pandas Dataframe if index=True, and also all data columns.
+        For example:
+        With a Snowpark pandas Dataframe (df) has index=[`A`, `B`], columns = [`C`, `D`],
+        the result Snowpark Dataframe after calling _to_snowpark_dataframe_from_snowpark_pandas_dataframe(index=True),
+        will have columns [`A`, `B`, `C`, `D`].
+
+        Checks are performed for pandas labels that will lead to invalid Snowflake identifiers. Example of pandas
+        labels that can result in invalid Snowflake identifiers are None and duplicated labels.
+
+        Note that Once converted to Snowpark Dataframe, ordering information will be lost, and there is no ordering
+        guarantee when displaying the Snowpark Dataframe result.
+
+        For details, please see comment in _to_snowpark_dataframe_of_pandas_dataframe.
+        """
+
+        return self._to_snowpark_dataframe_from_snowpark_pandas_dataframe(
+            index, index_label
+        )
+
+    @property
+    def columns(self) -> native_pd.Index:
+        """
+        Get pandas column labels.
+
+        Returns:
+            an index containing all pandas column labels
+        """
+        # TODO SNOW-837664: add more tests for df.columns
+        return self._modin_frame.data_columns_index
+
+    def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler":
+        """
+        Set pandas column labels with the new column labels
+
+        Args:
+            new_pandas_labels: A list like or index containing new pandas column names
+
+        Returns:
+            a new `SnowflakeQueryCompiler` with updated column labels
+        """
+        # new_pandas_names should be able to convert into an index which is consistent to pandas df.columns behavior
+        new_pandas_labels = ensure_index(new_pandas_labels)
+        if len(new_pandas_labels) != len(self._modin_frame.data_column_pandas_labels):
+            raise ValueError(
+                "Length mismatch: Expected axis has {} elements, new values have {} elements".format(
+                    len(self._modin_frame.data_column_pandas_labels),
+                    len(new_pandas_labels),
+                )
+            )
+
+        # Rename data columns in Snowpark dataframe. This step is not needed for correctness, we rename
+        # underlying Snowpark columns to keep them as close as possible to pandas labels. This is helpful for
+        # debuggability.
+        new_data_column_snowflake_quoted_identifiers = (
+            self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=new_pandas_labels.tolist(),
+            )
+        )
+        renamed_quoted_identifier_mapping = dict(
+            zip(
+                self._modin_frame.data_column_snowflake_quoted_identifiers,
+                new_data_column_snowflake_quoted_identifiers,
+            )
+        )
+
+        renamed_frame = self._modin_frame.rename_snowflake_identifiers(
+            renamed_quoted_identifier_mapping
+        )
+
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=renamed_frame.ordered_dataframe,
+            data_column_pandas_labels=new_pandas_labels.tolist(),
+            data_column_pandas_index_names=new_pandas_labels.names,
+            data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=renamed_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=renamed_frame.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def _shift_values(
+        self, periods: int, axis: Union[Literal[0], Literal[1]], fill_value: Hashable
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Implements logic to shift data of DataFrame or Series.
+        Args:
+            periods: periods by which to shift
+            axis: along which axis to shift rows (axis=0) or columns (axis=1)
+            fill_value: value to fill new columns with.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+
+        if axis == 0:
+            return self._shift_values_axis_0(periods, fill_value)
+        else:
+            return self._shift_values_axis_1(periods, fill_value)
+
+    def _shift_values_axis_0(
+        self, periods: int, fill_value: Hashable
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Shift rows and fill new columns with fill_value.
+        Args:
+            periods: How many rows to shift down (periods > 0) or up (periods < 0). periods = 0 results
+            in a no-op.
+            fill_value: value to fill new columns with, default: NULL
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+        # Shift using LAG window operation over row position window together with fill_value.
+        frame = self._modin_frame.ensure_row_position_column()
+        row_position_quoted_identifier = frame.row_position_snowflake_quoted_identifier
+
+        fill_value_dtype = infer_object_type(fill_value)
+        fill_value = pandas_lit(fill_value) if fill_value is not None else None
+        type_map = frame.quoted_identifier_to_snowflake_type()
+
+        def shift_expression(quoted_identifier: str, dtype: DataType) -> SnowparkColumn:
+            """
+            Helper function to generate lag-based shift expression for Snowpark pandas. Performs
+            necessary type conversion if datatype of fill_value is not compatible with a column's datatype.
+            Args:
+                quoted_identifier: identifier of column for which to generate shift expression
+                dtype: datatype of column identified by quoted_identifier
+
+            Returns:
+                SnowparkColumn columnar expression
+            """
+            window_expr = Window.orderBy(col(row_position_quoted_identifier))
+
+            # convert to variant type if types differ
+            if fill_value is not None and dtype != fill_value_dtype:
+                return lag(
+                    to_variant(col(quoted_identifier)),
+                    offset=periods,
+                    default_value=to_variant(fill_value),
+                ).over(window_expr)
+            else:
+                return lag(
+                    quoted_identifier, offset=periods, default_value=fill_value
+                ).over(window_expr)
+
+        new_frame = frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                quoted_identifier: shift_expression(
+                    quoted_identifier, type_map[quoted_identifier]
+                )
+                for quoted_identifier in frame.data_column_snowflake_quoted_identifiers
+            }
+        ).frame
+
+        return self.__constructor__(new_frame)
+
+    def _shift_values_axis_1(
+        self, periods: int, fill_value: Hashable
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Shift columns and fill new columns with fill_value.
+        Args:
+            periods: How many columns to shift to the right (periods > 0) or left (periods < 0). periods = 0 results
+            in a no-op.
+            fill_value: value to fill new columns with, default: NULL
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+
+        frame = self._modin_frame
+        column_labels = frame.data_column_pandas_labels
+
+        # Fill all columns with fill value (or NULL) if abs(periods) exceeds column count.
+        if abs(periods) >= len(column_labels):
+            new_frame = frame.update_snowflake_quoted_identifiers_with_expressions(
+                {
+                    quoted_identifier: pandas_lit(fill_value)
+                    for quoted_identifier in frame.data_column_snowflake_quoted_identifiers
+                }
+            ).frame
+            return self.__constructor__(new_frame)
+
+        # No fill with fill value when using periods == 0. Can be handled in frontend as well,
+        # listed here for completeness.
+        if periods == 0:  # pragma: no cover
+            return self  # pragma: no cover
+
+        # Positive periods shift to the right, negative periods shift to the left
+        # note that the order of data_column_snowflake_quoted_identifiers is the same as data_column_pandas_labels,
+        # therefore we can directly operate on data_column_snowflake_quoted_identifiers
+        col_expressions = [
+            col(quoted_identifier)
+            for quoted_identifier in frame.data_column_snowflake_quoted_identifiers
+        ]
+        if periods > 0:
+            # create expressions to shift data to right
+            # | lit(...) | lit(...) | ... | lit(...) | col(...) | ... | col(...) |
+            col_expressions = [pandas_lit(fill_value)] * periods + col_expressions[
+                :-periods
+            ]
+        else:
+            # create expressions to shift data to left
+            # | col(...) | ... | col(...) | lit(...) | lit(...) | ... | lit(...) |
+            col_expressions = col_expressions[-periods:] + [pandas_lit(fill_value)] * (
+                -periods
+            )
+
+        new_frame = frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                quoted_identifier: col_expressions[i]
+                for i, quoted_identifier in enumerate(
+                    frame.data_column_snowflake_quoted_identifiers
+                )
+            }
+        ).frame
+
+        return self.__constructor__(new_frame)
+
+    def _shift_index(self, periods: int, freq: Any) -> "SnowflakeQueryCompiler":  # type: ignore[return]
+        """
+        Shift index, to be implemented in SNOW-1023324.
+        Args:
+            periods: By what period to shift index (multiple of freq)
+            freq: frequency to use, revisit type hint Any as part of ticket to restrict.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+
+        assert freq is not None, "freq must be specified when calling shift index"
+
+        # TODO: SNOW-1023324, implement shifting index only.
+        ErrorMessage.not_implemented("shifting index values not yet supported.")
+
+    def shift(
+        self,
+        periods: int = 1,
+        freq: Any = None,
+        axis: Union[Literal[0], Literal[1]] = 0,
+        fill_value: Hashable = no_default,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Implements shift operation for DataFrame/Series.
+        Args:
+            periods: How many periods to shift for.
+            freq: If given, do not shift values but index only. If None, shift only data and keep index as-is.
+            axis: Whether to shift values (freq must be None) row-wise (axis=0) or column-wise (axis=1).
+            fill_value: Fill new columns with this value, default: None mapped to NULL.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+
+        # if frequency is None, shift data by periods
+        # else if frequency is given, shift index only
+        if freq is None:
+            return self._shift_values(periods, axis, fill_value)
+        else:
+            # axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324
+            return self._shift_index(periods, freq)  # pragma: no cover
+
+    @property
+    def index(self) -> pd.Index:
+        """
+        Get pandas index. The method eagerly pulls the values from Snowflake because index requires the values to be
+        filled
+
+        Returns:
+            The index (row labels) of the DataFrame.
+        """
+        return self._modin_frame.index_columns_index
+
+    def _is_scalar_in_index(self, scalar: Union[Scalar, tuple]) -> bool:
+        """
+        check whether scalar is contained in index or not. May issue up to one COUNT(...) based query, but tries
+        to avoid issuing a query as much as possible through types.
+        Returns:
+            True if contained, False else.
+        """
+        if isinstance(scalar, tuple):
+            # this is multi-index related, check whether scalar exists by splitting scalar up
+            # to check along each index column. Should be done as part of TODO SNOW-920433 index refactoring
+            ErrorMessage.not_implemented(
+                "multi-index key in index check not yet supported"
+            )  # pragma: no cover
+            return False  # pragma: no cover
+        else:
+            frame = self._modin_frame
+
+            # is index column count different? this is the case if dataframe has a multi-index and access is done via a
+            # a scalar (not a tuple)
+            if 1 != len(frame.index_column_snowflake_quoted_identifiers):
+                return False
+
+            sf_scalar_type = infer_object_type(scalar)
+            index_quoted_identifier = frame.index_column_snowflake_quoted_identifiers[0]
+            sf_index_type = frame.quoted_identifier_to_snowflake_type()[
+                index_quoted_identifier
+            ]
+
+            # for variant type always need to check, else compare if scalar access matches type or not
+            if (
+                not isinstance(sf_scalar_type, VariantType)
+                and not isinstance(sf_index_type, VariantType)
+                and sf_scalar_type != sf_index_type
+            ):
+                return False
+
+            # else, compare whether count of scalar is >= 1.
+            scalar_count = count_rows(
+                self._modin_frame.ordered_dataframe.filter(
+                    col(index_quoted_identifier) == scalar
+                ).select(index_quoted_identifier)
+            )
+
+            return scalar_count >= 1
+
+    def set_index(
+        self,
+        keys: list[Union[Hashable, "SnowflakeQueryCompiler"]],
+        drop: Optional[bool] = True,
+        append: Optional[bool] = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        This the implementation for DataFrame set_index API
+        Args:
+            keys: can be either a label/hashable, or SnowflakeQueryCompiler
+            drop: same as the drop argument for df.set_index
+            append: same as the append argument for df.set_index
+
+        Returns:
+            The new SnowflakeQueryCompiler after the set_index operation
+
+        """
+        if not any(isinstance(k, SnowflakeQueryCompiler) for k in keys):
+            return self.set_index_from_columns(keys, drop=drop, append=append)
+
+        self_num_rows = self.get_axis_len(axis=0)
+        new_qc = self
+        for key in keys:
+            if isinstance(key, SnowflakeQueryCompiler):
+                assert (
+                    len(key._modin_frame.data_column_pandas_labels) == 1
+                ), "need to be a series"
+                if key.get_axis_len(0) != self_num_rows:
+                    raise ValueError(
+                        f"Length mismatch: Expected {self_num_rows} rows, received array of length {key.get_axis_len(0)}"
+                    )
+                new_qc = new_qc.set_index_from_series(key, append)
+            else:
+                new_qc = new_qc.set_index_from_columns([key], drop, append)
+            append = True
+
+        return new_qc
+
+    def set_index_from_series(
+        self,
+        key: "SnowflakeQueryCompiler",
+        append: Optional[bool] = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        The helper method implements set_index with a single series key. The basic idea is to join this series and use
+        it as a new index column
+        Args:
+            key: the SnowflakeQueryCompiler of the series
+            append: as same as append argument in set_index
+
+        Returns:
+            The new SnowflakeQueryCompiler after the set_index operation
+        """
+        assert (
+            len(key._modin_frame.data_column_pandas_labels) == 1
+        ), "need to be a series"
+        self_frame = self._modin_frame.ensure_row_position_column()
+        other_frame = key._modin_frame.ensure_row_position_column()
+
+        # TODO: SNOW-935748 improve the workaround below for MultiIndex names
+        # The original index names. This value is used instead of the new internal frames'
+        # index names to preserve the MultiIndex columns of a DataFrame on which join() is performed.
+        # Without this, the column's datatype is changed from MultiIndex to Index during the join.
+        # This behavior is seen in DataFrame.set_axis() on a DataFrame with MultiIndex columns.
+        index_names = self._modin_frame.data_column_pandas_index_names
+
+        new_internal_frame, result_column_mapper = join_utils.join(
+            self_frame,
+            other_frame,
+            how="left",
+            left_on=[self_frame.row_position_snowflake_quoted_identifier],
+            right_on=[other_frame.row_position_snowflake_quoted_identifier],
+            inherit_join_index=InheritJoinIndex.FROM_LEFT,
+        )
+
+        series_name = key._modin_frame.data_column_pandas_labels[0]
+        if series_name == MODIN_UNNAMED_SERIES_LABEL:
+            series_name = None
+        new_index_labels = [series_name]
+        new_index_ids = result_column_mapper.map_right_quoted_identifiers(
+            other_frame.data_column_snowflake_quoted_identifiers
+        )
+        if append:
+            new_index_labels = (
+                new_internal_frame.index_column_pandas_labels + new_index_labels
+            )
+            new_index_ids = (
+                new_internal_frame.index_column_snowflake_quoted_identifiers
+                + new_index_ids
+            )
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=new_internal_frame.ordered_dataframe,
+            data_column_pandas_labels=self_frame.data_column_pandas_labels,
+            data_column_pandas_index_names=index_names,
+            data_column_snowflake_quoted_identifiers=result_column_mapper.map_left_quoted_identifiers(
+                self_frame.data_column_snowflake_quoted_identifiers
+            ),
+            index_column_pandas_labels=new_index_labels,
+            index_column_snowflake_quoted_identifiers=new_index_ids,
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def get_index_names(self, axis: int = 0) -> list[Hashable]:
+        """
+        Get index names of specified axis.
+
+        Parameters
+        ----------
+        axis : {0, 1}, default: 0
+        Axis to get index names on.
+
+        Returns
+        -------
+        list names for the Index along the direction.
+        """
+        return (
+            self._modin_frame.index_column_pandas_labels
+            if axis == 0
+            else self._modin_frame.data_column_pandas_index_names
+        )
+
+    def _binary_op_fallback(
+        self,
+        op: str,
+        other: Union[Scalar, AnyArrayLike, "pd.Series", "pd.DataFrame"],
+        axis: int,
+        squeeze_self: bool = False,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """this is a fallback till all binary operations are implemented."""
+
+        if axis == 1:
+            kwargs["axis"] = axis
+        if squeeze_self:
+            kwargs["squeeze_self"] = squeeze_self
+        pandas_op = getattr(
+            native_pd.Series if squeeze_self else native_pd.DataFrame, op
+        )
+
+        # Hotfix: SNOW-1062890 For some reason kwargs are passed for DataFrame.__and__ and DataFrame.__or__.
+        # However, pandas does not accept kwargs nor args for this function. Reset them here manually for affected
+        # operators. Could also add additional condition in axis==1 if condition, but better be explicit here
+        # for operator behavior.
+        if op in ["__and__", "__rand__", "__or__", "__ror__"]:
+            kwargs = {}
+
+        return BinaryDefault.register(pandas_op)(self, other, **kwargs)
+
+    def _binary_op_scalar_rhs(
+        self, op: str, other: Scalar, fill_value: Scalar
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Perform binary operation between a Series/DataFrame and a scalar.
+
+        Args:
+            op: Name of binary operation.
+            other: Second operand of binary operation, a list-like object.
+            fill_value: Fill existing missing (NaN) values, and any new element needed for
+                successful DataFrame alignment, with this value before computation.
+                If data in both corresponding DataFrame locations is missing the result will be missing.
+                only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
+        """
+        type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        replace_mapping = {
+            identifier: compute_binary_op_with_fill_value(
+                op=op,
+                lhs=col(identifier),
+                lhs_datatype=lambda: type_map[identifier],  # noqa: B023
+                rhs=pandas_lit(other),
+                rhs_datatype=lambda: infer_object_type(other),
+                fill_value=fill_value,
+            )
+            for identifier in self._modin_frame.data_column_snowflake_quoted_identifiers
+        }
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                replace_mapping
+            ).frame
+        )
+
+    def _binary_op_list_like_rhs_axis_0(
+        self,
+        op: str,
+        other: AnyArrayLike,
+        fill_value: Scalar,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Perform binary operation between a Series/DataFrame and a list-like object on axis=0.
+
+        Args:
+            op: Name of binary operation.
+            other: Second operand of binary operation, a list-like object.
+            fill_value: Fill existing missing (NaN) values, and any new element needed for
+                successful DataFrame alignment, with this value before computation.
+                If data in both corresponding DataFrame locations is missing the result will be missing.
+                only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
+        """
+        from snowflake.snowpark.modin.pandas.series import Series
+
+        # Step 1: Convert other to a Series and join on the row position with self.
+        other_qc = Series(other)._query_compiler
+        self_frame = self._modin_frame.ensure_row_position_column()
+        other_frame = other_qc._modin_frame.ensure_row_position_column()
+        new_frame = join_utils.align(
+            left=self_frame,
+            right=other_frame,
+            left_on=[self_frame.row_position_snowflake_quoted_identifier],
+            right_on=[other_frame.row_position_snowflake_quoted_identifier],
+            how="coalesce",
+        ).result_frame
+
+        # Step 2: The operation will be performed as a broadcast operation over all columns, therefore iterate
+        # through all the data quoted identifiers. In the case of a Series, there is only one data column.
+        identifier_to_type_map = new_frame.quoted_identifier_to_snowflake_type()
+
+        # Due to the join above, other's data column is the right-most column.
+        other_identifier = new_frame.data_column_snowflake_quoted_identifiers[-1]
+        # Step 3: Create a map from the column identifier to the binary operation expression. This is used
+        # to update the column data.
+        replace_mapping = {
+            identifier: compute_binary_op_with_fill_value(
+                op=op,
+                lhs=col(identifier),
+                lhs_datatype=lambda: identifier_to_type_map[identifier],  # noqa: B023
+                rhs=col(other_identifier),
+                rhs_datatype=lambda: identifier_to_type_map[other_identifier],
+                fill_value=fill_value,
+            )
+            for identifier in new_frame.data_column_snowflake_quoted_identifiers[:-1]
+        }
+
+        # Step 4: Update the frame with the expressions map and return a new query compiler after removing the
+        # column representing other's data.
+        new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+            replace_mapping
+        ).frame
+        new_frame = InternalFrame.create(
+            ordered_dataframe=new_frame.ordered_dataframe,
+            data_column_pandas_labels=new_frame.data_column_pandas_labels[:-1],
+            data_column_snowflake_quoted_identifiers=new_frame.data_column_snowflake_quoted_identifiers[
+                :-1
+            ],
+            data_column_pandas_index_names=new_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=new_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=new_frame.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_frame)
+
+    def _binary_op_list_like_rhs_axis_1(
+        self,
+        op: str,
+        other: AnyArrayLike,
+        fill_value: Scalar,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Perform binary operation between a DataFrame and a list-like object on axis=1.
+
+        Args:
+            op: Name of binary operation.
+            other: Second operand of binary operation, a list-like object.
+            fill_value: Fill existing missing (NaN) values, and any new element needed for
+                successful DataFrame alignment, with this value before computation.
+                If data in both corresponding DataFrame locations is missing the result will be missing.
+                only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
+        """
+        from snowflake.snowpark.modin.pandas.utils import is_scalar
+
+        replace_mapping = {}  # map: column identifier -> column expression
+        # Convert list-like object to list since the NaN values in the rhs are treated as invalid identifiers
+        # (misinterpreted SQL query) when the list-like object is not a list.
+        # Error: SnowparkSQLException: compilation error: error line 1 at position 313 invalid identifier 'NAN'.
+        other = other.tolist() if not isinstance(other, list) else other
+
+        # each element in the list-like object can be treated as a scalar for each corresponding column.
+        type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        for idx, identifier in enumerate(
+            self._modin_frame.data_column_snowflake_quoted_identifiers
+        ):
+            # iterate through `other` and use each element on a column.
+            # 1. if len(rhs) > num_cols, ignore the extra rhs elements.
+            # 2. if len(rhs) < num_cols, substitute missing elements with None.
+            lhs = col(identifier)
+            rhs = other[idx] if idx < len(other) else None
+            rhs = None if rhs == np.nan else rhs
+
+            # rhs is not guaranteed to be a scalar value - it can be a list-like as well.
+            # Convert all list-like objects to a list.
+            rhs_lit = pandas_lit(rhs) if is_scalar(rhs) else pandas_lit(rhs.tolist())
+            replace_mapping[identifier] = compute_binary_op_with_fill_value(
+                op,
+                lhs=lhs,
+                lhs_datatype=lambda: type_map[identifier],  # noqa: B023
+                rhs=rhs_lit,
+                rhs_datatype=lambda: infer_object_type(rhs),  # noqa: B023
+                fill_value=fill_value,
+            )
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                replace_mapping
+            ).frame
+        )
+
+    def binary_op(
+        self,
+        op: str,
+        other: Union[Scalar, AnyArrayLike, "pd.Series", "pd.DataFrame"],
+        axis: int,
+        level: Optional[Level] = None,
+        fill_value: Optional[Scalar] = None,
+        squeeze_self: bool = False,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Perform binary operation.
+
+        Args:
+            op: Name of binary operation.
+            other: Second operand of binary operation, which can be Scalar, Series or SnowflakeQueryCompiler.
+            axis: 0 (index), 1 (columns)
+            level: Broadcast across a level, matching Index values on the passed MultiIndex level.
+            fill_value: Fill existing missing (NaN) values, and any new element needed for
+                successful DataFrame alignment, with this value before computation.
+                If data in both corresponding DataFrame locations is missing the result will be missing.
+                only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have).
+            squeeze_self: If True, this query compiler comes from a Series.
+        """
+
+        # We distinguish between 5 cases here to handle an operation between the DataFrame/Series represented by this
+        # SnowflakeQueryCompiler and other
+        # 1. other is scalar                                        (DataFrame/Series <op> scalar)
+        # 2. other is list_like                                     (DataFrame/Series <op> array)
+        # 3. this is Series and other is Series                     (Series <op> Series)
+        # 4. this is Series and other is DataFrame or vice-versa    (DataFrame <op> Series)
+        # 5. this is DataFrame and other is DataFrame               (DataFrame <op> DataFrame)
+
+        # Native pandas does not support binary operations between a Series and a list-like object.
+
+        from snowflake.snowpark.modin.pandas.dataframe import DataFrame
+        from snowflake.snowpark.modin.pandas.series import Series
+        from snowflake.snowpark.modin.pandas.utils import is_scalar
+
+        # fail explicitly for unsupported scenarios
+        if level is not None:
+            # TODO SNOW-862668: binary operations with level
+            ErrorMessage.not_implemented(f"parameter level={level} not yet supported")
+
+        if fill_value is not None:
+            if not is_scalar(fill_value):
+                # In native pandas, single element list-like objects can be used as fill_value, however this does not
+                # match pandas documentation; hence it is omitted in the Snowpark pandas implementation.
+                raise ValueError("Only scalars can be used as fill_value.")
+            # add fill_value to kwargs in case fallback is called
+            kwargs["fill_value"] = fill_value
+
+        if not is_binary_op_supported(op):
+            return self._binary_op_fallback(op, other, axis, squeeze_self, **kwargs)
+
+        if is_scalar(other):
+            # (Case 1): other is scalar
+            # -------------------------
+            return self._binary_op_scalar_rhs(op, other, fill_value)
+
+        if not isinstance(other, (Series, DataFrame)) and is_list_like(other):
+            # (Case 2): other is list-like
+            # ----------------------------
+            if axis == 0:
+                return self._binary_op_list_like_rhs_axis_0(op, other, fill_value)
+            else:  # axis=1
+                return self._binary_op_list_like_rhs_axis_1(op, other, fill_value)
+
+        if squeeze_self and isinstance(other, Series):
+            # (Case 3): Series/Series
+            # -----------------------
+            # Both series objects are joined (with an outer join) based on their index,
+            # and the result is sorted after the index.
+            # In addition, pandas drops the name and the result becomes an unnamed series.
+            # E.g., for
+            # s1 = pd.Series([1, 2, 3], index=[5, 0, 1], name='s1')
+            # s2 = pd.Series([3, 5, 4], index=[1, 2, 10], name='s2')
+            # The result of
+            # s1 + s2
+            # is
+            # 0     NaN
+            # 1     6.0
+            # 2     NaN
+            # 5     NaN
+            # 10    NaN
+            # dtype: float64
+
+            # The logic should produce valid results but we do only have tests for the AND/OR/+ scenario, so
+            # conservatively use fallback here. TODO SNOW-913842 will remove this and add extensive testing.
+            if op not in {
+                "__or__",
+                "__ror__",
+                "__and__",
+                "__rand__",
+                "add",
+                "radd",
+                "sub",
+                "rsub",
+                "mul",
+                "rmul",
+                "truediv",
+                "rtruediv",
+                "floordiv",
+                "rfloordiv",
+                "mod",
+                "rmod",
+                "pow",
+                "rpow",
+                "eq",
+                "ne",
+                "gt",
+                "lt",
+                "ge",
+                "le",
+            }:
+                return self._binary_op_fallback(  # pragma: no cover
+                    op, other, axis, squeeze_self, **kwargs
+                )
+
+            lhs_frame = self._modin_frame
+            rhs_frame = other._query_compiler._modin_frame
+
+            # In native pandas when binary operation is performed between two series,
+            # they are joined on row position if indices are exact match otherwise
+            # they are joined with outer join.
+            # For example:
+            # s1 = pd.Series([1, 2, 3], index=[2, 1, 2])
+            # s2 = pd.Series([1, 1, 1], index=[2, 1, 2])
+            # s1 + s2 -> pd.Series([2, 3, 4], index=[2, 1, 2])
+            #
+            # s3 = pd.Series([1, 2, 3], index=[2, 1, 2])
+            # s4 = pd.Series([1, 1, 1], index=[2, 3, 2])
+            # s3 + s4 -> pd.Series([NaN, 2, 2, 4, 4, NaN], index=[1, 2, 2, 2, 2, 3])
+            aligned_frame, result_column_mapper = join_utils.align_on_index(
+                lhs_frame, rhs_frame
+            )
+
+            assert 2 == len(aligned_frame.data_column_snowflake_quoted_identifiers)
+
+            identifier_to_type_map = aligned_frame.quoted_identifier_to_snowflake_type()
+            lhs_quoted_identifier = result_column_mapper.map_left_quoted_identifiers(
+                lhs_frame.data_column_snowflake_quoted_identifiers
+            )[0]
+            rhs_quoted_identifier = result_column_mapper.map_right_quoted_identifiers(
+                rhs_frame.data_column_snowflake_quoted_identifiers
+            )[0]
+
+            # add new column with result as unnamed
+            new_column_expr = compute_binary_op_with_fill_value(
+                op=op,
+                lhs=col(lhs_quoted_identifier),
+                lhs_datatype=lambda: identifier_to_type_map[lhs_quoted_identifier],
+                rhs=col(rhs_quoted_identifier),
+                rhs_datatype=lambda: identifier_to_type_map[rhs_quoted_identifier],
+                fill_value=fill_value,
+            )
+
+            # name is dropped when names of series differ. A dropped name is using unnamed series label.
+            new_column_name = (
+                MODIN_UNNAMED_SERIES_LABEL
+                if lhs_frame.data_column_pandas_labels[0]
+                != rhs_frame.data_column_pandas_labels[0]
+                else lhs_frame.data_column_pandas_labels[0]
+            )
+
+            new_frame = aligned_frame.append_column(new_column_name, new_column_expr)
+
+            # return only newly created column. Because column has been appended, this is the last column indexed by -1
+            return SnowflakeQueryCompiler(
+                get_frame_by_col_pos(internal_frame=new_frame, columns=[-1])
+            )
+        elif squeeze_self or isinstance(other, Series):
+            # (Case 4): Series/DataFrame or DataFrame/Series
+            # --------------------------
+            # Distinguish here between axis=0 and axis=1 case
+
+            # Note that a binary operation for axis == 0 only works for
+            # the case DataFrame <binop> Series. self is a DataFrame if squeeze_self is False.
+            # However, pandas allows to call Series <binop> DataFrame with axis=0 set. In this case, the parameter
+            # axis=0 is ignored and the result works the same as if axis=1 is invoked.
+            if not squeeze_self and axis == 0:
+                return self._binary_op_between_dataframe_and_series_along_axis_0(
+                    op, other._query_compiler, fill_value
+                )
+
+            # Invoke axis=1 case, this is the correct pandas behavior if squeeze_self is True and axis=0 also.
+            return self._binary_op_between_dataframe_and_series_along_axis_1(
+                op, other._query_compiler, squeeze_self, fill_value
+            )
+        else:
+            # (Case 5): DataFrame/DataFrame
+            # -----------------------------
+
+            # other must be DataFrame
+            assert isinstance(other, DataFrame)
+
+            # The axis parameter is ignored for DataFrame <binop> DataFrame operations. The default axis behavior
+            # is always aligning by columns (axis=1). Binary operations between DataFrames support fill_value.
+            return self._binary_op_between_dataframes(
+                op, other._query_compiler, fill_value
+            )
+
+    def _bool_reduce_helper(
+        self,
+        empty_value: bool,
+        reduce_op: Literal["and", "or"],
+        axis: int,
+        _bool_only: Optional[bool],
+        skipna: Optional[bool],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Performs a boolean reduction across either axis.
+
+        empty_value: bool
+            The value returned for an empty dataframe.
+        reduce_op: {"and", "or"}
+            The name of the boolean operation to apply.
+        _bool_only: Optional[bool]
+            Unused, accepted for compatibility with modin frontend. If true, only boolean columns are included
+            in the result; this filtering is already performed on the frontend.
+        skipna: Optional[bool]
+            Exclude NA/null values. If the entire row/column is NA and skipna is True, then the result will be False,
+            as for an empty row/column. If skipna is False, then NA are treated as True, because these are not equal to zero.
+        """
+        assert reduce_op in ("and", "or")
+
+        frame = self._modin_frame
+        empty_columns = len(frame.data_columns_index) == 0
+        if not empty_columns and not all(
+            is_bool_dtype(t) or is_integer_dtype(t) for t in self.dtypes
+        ):
+            # Default if columns are non-integer/boolean
+            return DataFrameDefault.register(
+                native_pd.DataFrame.all
+                if reduce_op == "and"
+                else native_pd.DataFrame.any
+            )(
+                self,
+                axis=axis,
+                bool_only=_bool_only,
+                skipna=skipna,
+            )  # pragma: no cover
+
+        if axis == 1:
+            # append a new column representing the reduction of all the columns
+            reduce_expr = pandas_lit(empty_value)
+            for col_name in frame.data_column_snowflake_quoted_identifiers:
+                if reduce_op == "and":
+                    reduce_expr = col(col_name).cast(BooleanType()) & reduce_expr
+                else:
+                    reduce_expr = col(col_name).cast(BooleanType()) | reduce_expr
+            new_frame = frame.append_column(MODIN_UNNAMED_SERIES_LABEL, reduce_expr)
+            # return only newly created column. Because column has been appended, this is the last column indexed by -1
+            return SnowflakeQueryCompiler(
+                get_frame_by_col_pos(internal_frame=new_frame, columns=[-1])
+            )
+        else:
+            assert axis == 0
+            # The query compiler agg method complains if the resulting aggregation is empty, so we add a special check here
+            if empty_columns:
+                # The result should be an empty series of dtype bool, which is internally represented as an
+                # empty dataframe with only the MODIN_UNNAMED_SERIES_LABEL column
+                return SnowflakeQueryCompiler.from_pandas(
+                    native_pd.DataFrame({MODIN_UNNAMED_SERIES_LABEL: []}, dtype=bool)
+                )
+            # Even though it incurs an extra query, we must get the length of the index to prevent errors.
+            # For example, for `pd.DataFrame({"a": [], "b": []}).all()`: the rows are empty but the columns
+            # exist, but it errors if we call `self.agg()` because empty columns have type float64 in Snowpark.
+            # Moreover, `pd.Series([]).all()` would incorrectly return `None` instead of the vacuous truth because
+            # Snowpark's boolean aggregation functions return `None` when the column is empty.
+            empty_index = self.get_axis_len(axis=0) == 0
+            if empty_index:
+                return SnowflakeQueryCompiler(
+                    self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                        {
+                            col_id: pandas_lit(empty_value)
+                            for col_id in frame.data_column_snowflake_quoted_identifiers
+                        }
+                    ).frame
+                )
+            agg_func = "booland_agg" if reduce_op == "and" else "boolor_agg"
+            # The resulting DF is transposed so will have string 'NULL' as a column name,
+            # so we need to manually remove it
+            return self.agg(
+                agg_func,
+                axis=0,
+                args=[],
+                kwargs={"skipna": skipna},
+            ).set_columns([MODIN_UNNAMED_SERIES_LABEL])
+
+    def all(
+        self,
+        axis: int,
+        bool_only: Optional[bool],
+        skipna: Optional[bool],
+    ) -> "SnowflakeQueryCompiler":
+        return self._bool_reduce_helper(
+            True, "and", axis=axis, _bool_only=bool_only, skipna=skipna
+        )
+
+    def any(
+        self,
+        axis: int,
+        bool_only: Optional[bool],
+        skipna: Optional[bool],
+    ) -> "SnowflakeQueryCompiler":
+        return self._bool_reduce_helper(
+            False, "or", axis=axis, _bool_only=bool_only, skipna=skipna
+        )
+
+    def _parse_names_arguments_from_reset_index(
+        self,
+        names: IndexLabel,
+        levels_to_be_reset: list[int],
+        index_column_pandas_labels_moved: list[Hashable],
+    ) -> list[Hashable]:
+        """
+        Returns a list of pandas labels from ``names`` argument in ``reset_index`` method.
+        The result will be used as pandas labels for columns moved from index columns to data
+        columns after ``reset_index`` call.
+
+        Args:
+            names: ``names`` argument from ``reset_index`` method
+            levels_to_be_reset: A list of integers representing index column levels to be reset.
+                It should be returned from ``parse_levels_to_integer_levels`` as
+                parsed ``level`` arguments.
+            index_column_pandas_labels_moved: a list of current pandas labels moved from index
+                columns to data columns. It is only used when names is ``None``.
+        """
+        if names:
+            # validate names
+            if isinstance(names, (str, int)):
+                names = [names]
+            if not isinstance(names, list):
+                # Same error message as native pandas.
+                raise ValueError("Index names must be str or 1-dimensional list")
+            # only keep names corresponding to index columns to be moved to data columns
+            # Therefore, if len(names) is greater than number of index columns, additional
+            # values are simply ignored; if len(names) is less than number of index columns
+            # an IndexError is raised, which are the same as native pandas
+            return [
+                names[idx]
+                for idx in range(self._modin_frame.num_index_columns)
+                if idx in levels_to_be_reset
+            ]
+        else:
+            # Replace None with values:
+            # 1. Use "index" if no column exists with same name and index is not multi-index.
+            # 2. Use "level_{i}' where i is level on index column (starts with 0).
+            # Also check the docstring of fill_none_in_index_labels
+            return fill_none_in_index_labels(
+                index_column_pandas_labels_moved,
+                existing_labels=index_column_pandas_labels_moved
+                + self._modin_frame.data_column_pandas_labels,
+            )
+
+    def _check_duplicates_in_reset_index(
+        self, allow_duplicates: bool, index_column_pandas_labels_moved: list[Hashable]
+    ) -> None:
+        """
+        Checks whether pandas labels moved from index columns to data columns have duplicates
+        with existing pandas labels of data columns in ``reset_index`` method.
+        Args:
+            allow_duplicates: If True, check duplicates.
+            index_column_pandas_labels_moved: a list of current pandas labels moved from index
+                columns to data columns.
+
+        Raises:
+            ValueError if there is a conflict.
+        """
+        if not allow_duplicates:
+            pandas_labels_set = set(self._modin_frame.data_column_pandas_labels)
+            for pandas_label in index_column_pandas_labels_moved:
+                if pandas_label in pandas_labels_set:
+                    # Same error message as native pandas.
+                    raise ValueError(f"cannot insert {pandas_label}, already exists")
+                pandas_labels_set.add(pandas_label)
+
+    def reset_index(
+        self,
+        level: IndexLabel = None,
+        drop: bool = False,
+        col_level: Hashable = 0,
+        col_fill: Hashable = "",
+        allow_duplicates: bool = False,
+        names: IndexLabel = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Reset the index, or a level of it.
+        Args:
+            drop: Whether to drop the reset index or insert it at the beginning of the frame.
+            level : Level to remove from index. Removes all levels by default.
+            col_level : If the columns have multiple levels, determines which level the labels are inserted into.
+            col_fill : If the columns have multiple levels, determines how the other levels are named.
+            allow_duplicates: Allow duplicate column lables to be created.
+            names: Using the given string, rename the DataFrame column which contains the index data.
+                Must be int, str or 1-dimensional list. If the DataFrame has a MultiIndex, this has to be a list or
+                tuple with length equal to the number of levels.
+        Returns:
+            A new SnowflakeQueryCompiler instance with updated index.
+        """
+        # These levels will be moved from index columns to data columns
+        levels_to_be_reset = self._modin_frame.parse_levels_to_integer_levels(
+            level, allow_duplicates=False
+        )
+
+        # index_columns_pandas_labels_moved contains pandas labels moved from index columns
+        # to data columns
+        # index_columns_pandas_labels_remained contains pandas labels remained in index columns
+        # We need to iterate over original index_column_pandas_labels again to make the order
+        # of labels in index_columns_pandas_labels_moved consistent with the order in
+        # original index_column_pandas_labels. This is to align with pandas.
+        # Meanwhile, we extract index_column_snowflake_quoted_identifiers_remained and
+        # index_column_snowflake_quoted_identifiers_moved for future use.
+        (
+            index_column_pandas_labels_moved,
+            index_column_snowflake_quoted_identifiers_moved,
+            index_column_pandas_labels_remained,
+            index_column_snowflake_quoted_identifiers_remained,
+        ) = self._modin_frame.get_snowflake_identifiers_and_pandas_labels_from_levels(
+            levels_to_be_reset
+        )
+        ordered_dataframe = self._modin_frame.ordered_dataframe
+
+        # if all index columns are reset, assign a default index with row position column
+        if len(index_column_pandas_labels_remained) == 0:
+            index_column_snowflake_quoted_identifier = (
+                ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[INDEX_LABEL],
+                    wrap_double_underscore=True,
+                )[0]
+            )
+            # duplicate the row position column as the new index column
+            ordered_dataframe = ordered_dataframe.ensure_row_position_column()
+            ordered_dataframe = append_columns(
+                ordered_dataframe,
+                index_column_snowflake_quoted_identifier,
+                col(ordered_dataframe.row_position_snowflake_quoted_identifier),
+            )
+            index_column_pandas_labels_remained = [
+                None
+            ]  # by default index label is None
+            index_column_snowflake_quoted_identifiers_remained = [
+                index_column_snowflake_quoted_identifier
+            ]
+
+        # Do not drop existing index columns and move them to data columns.
+        if not drop:
+            # Get new pandas labels based on names arguments or existing index columns.
+            new_index_column_pandas_labels_moved = (
+                self._parse_names_arguments_from_reset_index(
+                    names, levels_to_be_reset, index_column_pandas_labels_moved
+                )
+            )
+
+            if (
+                new_index_column_pandas_labels_moved
+                and self._modin_frame.is_multiindex(axis=1)
+            ):
+                # If data column is multiindex, try to re-construct the index pandas label
+                # to align with the same number of levels as data column labels by applying filling rules.
+                num_levels = self._modin_frame.num_index_levels(axis=1)
+                int_col_level = self._modin_frame.parse_levels_to_integer_levels(
+                    [col_level], allow_duplicates=False, axis=1
+                )[0]
+
+                new_index_column_pandas_labels_moved_with_filling = []
+                for index_label in new_index_column_pandas_labels_moved:
+                    fill_value = col_fill
+                    index_label_components = (
+                        list(index_label)
+                        if isinstance(index_label, tuple)
+                        else [index_label]
+                    )
+                    if col_fill is None:
+                        if len(index_label_components) not in (1, num_levels):
+                            # this is consistent with pandas, it requires the length of the label to either 1 or
+                            # same as num_levels
+                            raise ValueError(
+                                "col_fill=None is incompatible "
+                                f"with incomplete column name {index_label}"
+                            )
+                        # According to pandas doc, if fill value is None, it repeats the index name.
+                        # Note that Snowpark pandas behavior is different compare with current pandas,
+                        # current pandas set the filling value with the first index name it finds, and
+                        # since it handles the index in reverse order, it fills with the last index value.
+                        # For example, if the index names are ['a', 'b'], 'b' is always used as filling
+                        # value even when fill the index 'a'. This is because the implementation does an inplace
+                        # update of col_fill, which seems an implementation bug, and not consistent with
+                        # the doc.
+                        # With Snowpark pandas, we provide the behavior same as the document that repeats
+                        # the index name for the index to fill.
+                        fill_value = index_label_components[0]
+
+                    filled_index_label = fill_missing_levels_for_pandas_label(
+                        index_label, num_levels, int_col_level, fill_value
+                    )
+                    new_index_column_pandas_labels_moved_with_filling.append(
+                        filled_index_label
+                    )
+
+                new_index_column_pandas_labels_moved = (
+                    new_index_column_pandas_labels_moved_with_filling
+                )
+
+            # Check for duplicates and raise error if there is a conflict.
+            self._check_duplicates_in_reset_index(
+                allow_duplicates, new_index_column_pandas_labels_moved
+            )
+
+            # Move existing index columns to data columns.
+            data_column_pandas_labels = (
+                new_index_column_pandas_labels_moved
+                + self._modin_frame.data_column_pandas_labels
+            )
+            data_column_snowflake_quoted_identifiers = (
+                index_column_snowflake_quoted_identifiers_moved
+                + self._modin_frame.data_column_snowflake_quoted_identifiers
+            )
+        else:
+            data_column_pandas_labels = self._modin_frame.data_column_pandas_labels
+            data_column_snowflake_quoted_identifiers = (
+                self._modin_frame.data_column_snowflake_quoted_identifiers
+            )
+
+        internal_frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=index_column_pandas_labels_remained,
+            index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers_remained,
+        )
+
+        return SnowflakeQueryCompiler(internal_frame)
+
+    # TODO: Eliminate from Modin QC layer and call `first_last_valid_index` directly from frontend
+    def first_valid_index(self) -> Union[Scalar, tuple[Scalar]]:
+        """
+        Return index for first non-NA value or None, if no non-NA value is found.
+
+        Returns:
+            scalar or None, Tuple of scalars if MultiIndex
+        """
+        return self.first_last_valid_index(ValidIndex.FIRST)
+
+    # TODO: Eliminate from Modin QC layer and call `first_last_valid_index` directly from frontend
+    def last_valid_index(self) -> Union[Scalar, tuple[Scalar]]:
+        """
+        Return index for last non-NA value or None, if no non-NA value is found.
+
+        Returns:
+            scalar or None, Tuple of scalars if MultiIndex
+        """
+        return self.first_last_valid_index(ValidIndex.LAST)
+
+    def first_last_valid_index(
+        self,
+        first_or_last: ValidIndex,
+    ) -> Union[Scalar, tuple[Scalar]]:
+        """
+        Helper function to get first or last valid index.
+
+        Parameters:
+            first_or_last: Enum specifying which valid index to return.
+                Can be either ValidIndex.FIRST or ValidIndex.LAST.
+
+        Returns:
+            scalar or None, Tuple of scalars if MultiIndex
+        """
+        # Results in a Series with boolean values. If any value in the Series is True,
+        # all values of the corresponding row of the input df exist
+        qc = self.notna().any(axis=1, bool_only=False, skipna=True)
+        # Filter for True values and get index based on first_or_last
+        valid_index_values = get_valid_index_values(
+            frame=qc._modin_frame, first_or_last=first_or_last
+        )
+
+        if valid_index_values:
+            return convert_snowpark_row_to_pandas_index(
+                valid_index_values=valid_index_values,
+                index_dtypes=self.index_dtypes,
+            )
+        return None
+
+    def sort_index(
+        self,
+        axis: int,
+        level: list[Union[str, int]],
+        ascending: Union[bool, list[bool]],
+        inplace: bool,
+        kind: SortKind,
+        na_position: NaPosition,
+        sort_remaining: bool,
+        ignore_index: bool,
+        key: Optional[IndexKeyFunc] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Sort object by labels (along an axis).
+
+        Args:
+            axis: The axis along which to sort.
+            level: If not None, sort on values in specified index level(s).
+            ascending: A list of bools to represent ascending vs descending sort. Defaults to True.
+                When the index is a MultiIndex the sort direction can be controlled for each level individually.
+            inplace: Whether to modify the DataFrame rather than creating a new one.
+            kind: Choice of sorting algorithm. Perform stable sort if 'stable'. Defaults to unstable sort.
+                Snowpark pandas ignores choice of sorting algorithm except 'stable'.
+            na_position: Puts NaNs at the beginning if 'first'; 'last' puts NaNs at the end. Defaults to 'last'
+            sort_remaining: If True and sorting by level and index is multilevel, then sort by other levels
+                too (in order) after sorting by specified level.
+            ignore_index: If True, existing index is ignored and new index is generated which is a gap free
+                sequence from 0 to n-1. Defaults to False.
+            key: If not None, apply the key function to the index values before sorting. This is similar to
+                the key argument in the builtin sorted() function, with the notable difference that this key
+                function should be vectorized. It should expect an Index and return an Index of the same shape.
+                Apply the key function to the index values before sorting.
+
+        Returns:
+            A new SnowflakeQueryCompiler instance after applying the sort.
+
+        Examples:
+        >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
+        >>> s.sort_index()
+        1.0    c
+        2.0    b
+        3.0    a
+        NaN    d
+        dtype: object
+        >>> s.sort_index(ignore_index=True)
+        0    c
+        1    b
+        2    a
+        3    d
+        dtype: object
+        >>> s.sort_index(ascending=False, na_position="first")
+        NaN    d
+        3.0    a
+        2.0    b
+        1.0    c
+        dtype: object
+        """
+        if axis in (1, "index"):
+            ErrorMessage.not_implemented(
+                "sort_index is not supported yet on axis=1 in Snowpark pandas."
+            )
+        if inplace:
+            ErrorMessage.not_implemented(
+                "sort_index is not supported yet with inplace=True in Snowpark pandas."
+            )
+        if self._modin_frame.is_multiindex() or level is not None:
+            ErrorMessage.not_implemented(
+                "sort_index() with multi index is not supported yet in Snowpark pandas."
+            )
+
+        return self.sort_rows_by_column_values(
+            columns=self.get_index_names(),
+            ascending=ascending if isinstance(ascending, list) else [ascending],
+            kind=kind,
+            na_position=na_position,
+            ignore_index=ignore_index,
+            key=key,
+        )
+
+    def sort_rows_by_column_values(
+        self,
+        columns: list[Hashable],
+        ascending: list[bool],
+        kind: SortKind,
+        na_position: NaPosition,
+        ignore_index: bool,
+        key: Optional[IndexKeyFunc] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Reorder the rows based on the lexicographic order of the given columns.
+
+        Args:
+            columns: A list of columns to sort by
+            ascending: A list of bools to represent ascending vs descending sort. Defaults to True.
+            kind: Choice of sorting algorithm. Perform stable sort if 'stable'. Defaults to unstable sort.
+                Snowpark pandas ignores choice of sorting algorithm except 'stable'.
+            na_position: Puts NaNs at the beginning if 'first'; 'last' puts NaNs at the end. Defaults to 'last'
+            ignore_index: If True, existing index is ignored and new index is generated which is a gap free
+                sequence from 0 to n-1. Defaults to False.
+            key: Apply the key function to the values before sorting. Fallback to native pandas if key is provided.
+
+        Returns:
+            A new SnowflakeQueryCompiler instance after applying the sort.
+        """
+        # Check for empty column list, this is a no-op in native pandas.
+        # Snowpark dataframe doesn't allow sorting on empty list hence we need this explicit check here.
+        if len(columns) == 0:
+            return self
+
+        if key:
+            # TODO SNOW-828589: Move all warning messages to single place.
+            logging.warning(
+                "Snowpark pandas doesn't currently support distributed computation of sort_values with 'key'."
+            )
+            # This method will execute the sort operation using fallback on stored proc/vectorized udf.
+            return DataFrameDefault.register(native_pd.DataFrame.sort_values)(
+                self,
+                by=columns,
+                axis=0,
+                ascending=ascending,
+                kind=kind,
+                na_position=na_position,
+                ignore_index=ignore_index,
+                key=key,
+            )
+
+        # In native pandas, 'kind' option is only applied when sorting on a single column or label.
+        if len(columns) == 1:
+            if kind not in get_args(SortKind):
+                # This error message is different from native pandas hence, hence it is kept here instead
+                # of moving this to frontend layer.
+                raise ValueError(f"sort kind must be 'stable' or None (got '{kind}')")
+            if kind != "stable":
+                logging.warning(
+                    f"choice of sort algorithm '{kind}' is ignored. sort kind must be 'stable' or None"
+                )
+
+        matched_identifiers = (
+            self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                columns
+            )
+        )
+
+        # Create ordering columns
+        na_last = na_position == "last"
+        ordering_columns = [
+            OrderingColumn(identifiers[0], asc, na_last)
+            for identifiers, asc in zip(matched_identifiers, ascending)
+        ]
+
+        # We want to provide stable sort even if user provided sort kind is not 'stable'. We are doing this make
+        # ordering deterministic.
+        # Snowflake backend sort is unstable. Add row position to ordering columns to make sort stable.
+        internal_frame = self._modin_frame.ensure_row_position_column()
+        ordered_dataframe = internal_frame.ordered_dataframe.sort(
+            *ordering_columns,
+            OrderingColumn(internal_frame.row_position_snowflake_quoted_identifier),
+        )
+
+        sorted_frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+        )
+        sorted_qc = SnowflakeQueryCompiler(sorted_frame)
+
+        if ignore_index:
+            sorted_qc = sorted_qc.reset_index(drop=True)
+        return sorted_qc
+
+    def validate_groupby(
+        self,
+        by: Any,
+        axis: int,
+        level: Optional[IndexLabel],
+    ) -> None:
+        """
+        This function only performs validation for groupby that need access to the information
+        of internal frame.
+
+        Args:
+            by: mapping, SnowSeries, callable, label, pd.Grouper, list of such. Used to determine the groups for the groupby.
+            axis: 0 (index), 1 (columns)
+            level: Optional[IndexLabel]. The IndexLabel can be int, level name, or sequence of such.
+                    If the axis is a MultiIndex (hierarchical), group by a particular level or levels.
+        Raises:
+            ValueError if no by item is passed
+            KeyError if a hashable label in by (groupby items) can not be found in the current dataframe
+            ValueError if more than one column can be found for the groupby item
+        """
+        validate_groupby_columns(self, by, axis, level)
+
+    def groupby_ngroups(
+        self,
+        by: Any,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+    ) -> int:
+        level = groupby_kwargs.get("level", None)
+        dropna = groupby_kwargs.get("dropna", True)
+
+        can_be_distributed = check_is_groupby_supported_by_snowflake(by, level, axis)
+
+        def fallback_ngroups() -> int:
+            """
+            Creates a SnowflakeQueryCompiler through a fallback operation,
+            whose snowpark dataframe holds the result of the ngroups operation.
+            The snowpark dataframe has the form of [Row('0'=<ngroups_value>, ...)]
+            and we call collect to return this result. Please note that this will
+            trigger an eager evaluation.
+            """
+            query_compiler: SnowflakeQueryCompiler = GroupByDefault.register(
+                native_pd.core.groupby.DataFrameGroupBy.ngroups
+            )(
+                self,
+                by=by,
+                axis=axis,
+                groupby_kwargs=groupby_kwargs,
+            )
+            ngroups_result = query_compiler._modin_frame.ordered_dataframe.collect()
+            return ngroups_result[0]["0"]
+
+        if not can_be_distributed:
+            return fallback_ngroups()
+
+        query_compiler = get_frame_with_groupby_columns_as_index(
+            self, by, level, dropna
+        )
+
+        if query_compiler is None:
+            return fallback_ngroups()
+
+        internal_frame = query_compiler._modin_frame
+
+        return count_rows(
+            get_groups_for_ordered_dataframe(
+                internal_frame.ordered_dataframe,
+                internal_frame.index_column_snowflake_quoted_identifiers,
+            )
+        )  # pragma: no cover
+
+    def groupby_agg(
+        self,
+        by: Any,
+        agg_func: AggFuncType,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+        agg_args: Any,
+        agg_kwargs: dict[str, Any],
+        how: str = "axis_wise",
+        numeric_only: bool = False,
+        is_series_groupby: bool = False,
+        drop=False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        compute groupby with aggregation functions.
+        Note: groupby with categorical data type expands all categories during groupby, for example,
+        with a dataframe created with following:
+        cat = pd.Categorical([0, 1, 2])
+        df = pd.DataFrame({"A": cat, "B": [2, 1, 1], "C": [2, 2, 0]})
+            A	B	C
+        0	0	2	2
+        1	1	1	2
+        2	2	1	0
+        And df.groupby(['A', 'B']).max() gives the following result:
+                C
+        A	B
+        0	1	NaN
+            2	2.0
+        1	1	2.0
+            2	NaN
+        2	1	0.0
+            2	NaN
+        It creates one group for the cross product of each distinct value of the groupby columns [0, 1, 2] * [1, 2],
+        instead of having one group per unique combination of the groupby columns.
+        Categorical data type is currently not supported by Snowpark pandas API, such case will not happen.
+        TODO (SNOW-895114): Handle Categorical data type in groupby once Categorical DType is supported.
+
+        Args:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Used to determine the groups for the groupby.
+            agg_func: callable, str, list or dict. the aggregation function used.
+            axis : 0 (index), 1 (columns)
+            groupby_kwargs: keyword arguments passed for the groupby. The groupby keywords handled in the
+                    function contains:
+                    level: int, level name, or sequence of such, default None. If the axis is a MultiIndex(hierarchical),
+                           group by a particular level or levels. Do not specify both by and level.
+                    sort: bool, default True. Sort group keys. Groupby preserves the order of rows within each group.
+                    dropna: bool, default True. If True, and if group keys contain NA values, NA values together with
+                        row/column will be dropped. f False, NA values will also be treated as the key in groups.
+            agg_args: the arguments passed for the aggregation
+            agg_kwargs: keyword arguments passed for the aggregation function.
+            how: str. how the aggregation function can be applied.
+            numeric_only: bool. whether to drop the non-numeric columns during aggregation.
+            is_series_groupby: bool. whether the aggregation is called on SeriesGroupBy or not.
+            drop: Modin argument (??)
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+
+        level = groupby_kwargs.get("level", None)
+        can_be_distributed = check_is_groupby_supported_by_snowflake(
+            by, level, axis
+        ) and check_is_aggregation_supported_in_snowflake(agg_func, agg_kwargs, axis)
+
+        def register_default_to_pandas() -> SnowflakeQueryCompiler:
+            return GroupByDefault.register(GroupByDefault.get_aggregation_method(how))(
+                self,
+                by=by,
+                agg_func=agg_func,
+                axis=axis,
+                groupby_kwargs=groupby_kwargs,
+                agg_args=agg_args,
+                agg_kwargs=agg_kwargs,
+            )
+
+        if not can_be_distributed:
+            if agg_func in ["head", "tail"]:
+                # head and tail cannot be run per column - it is run on the
+                # whole table at once.
+                return self._groupby_head_tail(
+                    n=agg_kwargs.get("n", 5),
+                    op_type=agg_func,
+                    by=by,
+                    level=level,
+                    dropna=agg_kwargs.get("dropna", True),
+                )
+            else:
+                return register_default_to_pandas()
+
+        sort = groupby_kwargs.get("sort", True)
+        as_index = groupby_kwargs.get("as_index", True)
+        dropna = groupby_kwargs.get("dropna", True)
+
+        original_index_column_labels = self._modin_frame.index_column_pandas_labels
+
+        query_compiler = get_frame_with_groupby_columns_as_index(
+            self, by, level, dropna
+        )
+
+        if query_compiler is None:
+            return register_default_to_pandas()
+
+        by_list = query_compiler._modin_frame.index_column_pandas_labels
+
+        if numeric_only:
+            # drop off the non-numeric data columns if the data column is not part of the groupby columns
+            query_compiler = drop_non_numeric_data_columns(
+                query_compiler,
+                pandas_labels_for_columns_to_exclude=by_list,
+            )
+
+        internal_frame = query_compiler._modin_frame
+
+        # get a map between the Snowpark pandas column to the aggregation function needs to be applied on the column
+        column_to_agg_func = convert_agg_func_arg_to_col_agg_func_map(
+            internal_frame,
+            agg_func,
+            pandas_labels_for_columns_to_exclude_when_agg_on_all=by_list,
+        )
+
+        # turn each agg function into an AggFuncInfo named tuple, where is_dummy_agg is set to false;
+        # i.e., none of the aggregations here can be dummy.
+        column_to_agg_func = {
+            agg_col: (
+                [AggFuncInfo(func=fn, is_dummy_agg=False) for fn in func]
+                if is_list_like(func)
+                else AggFuncInfo(func=func, is_dummy_agg=False)
+            )
+            for (agg_col, func) in column_to_agg_func.items()
+        }
+
+        # get the quoted identifiers for all the by columns. After set_index_from_columns,
+        # the index columns of the internal frame are the groupby columns.
+        by_snowflake_quoted_identifiers = (
+            internal_frame.index_column_snowflake_quoted_identifiers
+        )
+
+        agg_col_ops, new_data_column_index_names = generate_column_agg_info(
+            internal_frame, column_to_agg_func, agg_kwargs, is_series_groupby
+        )
+        # the pandas label and quoted identifier generated for each result column
+        # after aggregation will be used as new pandas label and quoted identifiers.
+        new_data_column_pandas_labels = [
+            col_agg_op.agg_pandas_label for col_agg_op in agg_col_ops
+        ]
+        new_data_column_quoted_identifier = [
+            col_agg_op.agg_snowflake_quoted_identifier for col_agg_op in agg_col_ops
+        ]
+        if sort:
+            # when sort is True, the result is ordered by the groupby keys
+            ordering_columns = [
+                OrderingColumn(quoted_identifier)
+                for quoted_identifier in by_snowflake_quoted_identifiers
+            ]
+        else:
+            # when sort is False, the order is decided by the position of the groupby
+            # keys in the original dataframe. In order to recover the order, we retain
+            # min(row_position) in the aggregation result.
+            internal_frame = internal_frame.ensure_row_position_column()
+            row_position_quoted_identifier = (
+                internal_frame.row_position_snowflake_quoted_identifier
+            )
+            row_position_agg_column_op = AggregateColumnOpParameters(
+                snowflake_quoted_identifier=row_position_quoted_identifier,
+                data_type=internal_frame.quoted_identifier_to_snowflake_type()[
+                    row_position_quoted_identifier
+                ],
+                agg_pandas_label=None,
+                agg_snowflake_quoted_identifier=row_position_quoted_identifier,
+                snowflake_agg_func=min_,
+                ordering_columns=internal_frame.ordering_columns,
+            )
+            agg_col_ops.append(row_position_agg_column_op)
+            ordering_columns = [OrderingColumn(row_position_quoted_identifier)]
+
+        ordered_dataframe = internal_frame.ordered_dataframe
+
+        if len(agg_col_ops) == 0:
+            # if no columns to aggregate on, return all distinct groups of the dataframe
+            # the groupby columns will be used as ordering column in the result
+            ordered_dataframe = get_groups_for_ordered_dataframe(
+                ordered_dataframe, by_snowflake_quoted_identifiers
+            )
+        else:
+            # get the group by agg result for the data frame
+            # the columns of the snowpark dataframe will be groupby columns + aggregation columns
+            ordered_dataframe = aggregate_with_ordered_dataframe(
+                ordered_dataframe=ordered_dataframe,
+                agg_col_ops=agg_col_ops,
+                agg_kwargs=agg_kwargs,
+                groupby_columns=by_snowflake_quoted_identifiers,
+                # index_column_snowflake_quoted_identifier is used for idxmax/idxmin - we use the original index.
+                index_column_snowflake_quoted_identifier=self._modin_frame.index_column_snowflake_quoted_identifiers,
+            )
+        ordered_dataframe = ordered_dataframe.sort(ordering_columns)
+
+        new_index_column_pandas_labels = internal_frame.index_column_pandas_labels
+        new_index_column_quoted_identifiers = (
+            internal_frame.index_column_snowflake_quoted_identifiers
+        )
+        drop = False
+        if not as_index:
+            # drop off the index columns that are from the original index columns and also the index
+            # columns that are from data column with aggregation function applied.
+            # For example: with the following dataframe, which has data column ['A', 'B', 'C', 'D', 'E']
+            #   A       B       C       D       E
+            # 0 foo     one     small   1       2
+            # 1	foo     one     large   2   	4
+            # 2	foo     two     small   3       5
+            # 3	foo     two     small   3       6
+            # 4	bar     one     small   5       8
+            # 5	bar     two     small   6       9
+            # After apply df.groupby(['A', 'B'], as_index=False).agg({"A": min, 'C': max}), the result is following:
+            #   B	A	C
+            # 0	one	bar	small
+            # 1	two	bar	small
+            # 2	one	foo	small
+            # 3	two	foo	small
+            # Where groupby column 'A' is dropped because it is used in aggregation min, but column 'B' is retained
+            # because it is originally a data column, and not used in any aggregation.
+            new_index_column_pandas_labels_to_keep = []
+            new_index_column_quoted_identifiers_to_keep = []
+            origin_agg_column_labels = [
+                pandas_label for pandas_label, _ in column_to_agg_func.keys()
+            ]
+            for label, quoted_identifier in zip(
+                internal_frame.index_column_pandas_labels,
+                internal_frame.index_column_snowflake_quoted_identifiers,
+            ):
+                if (
+                    label not in original_index_column_labels
+                    and label not in origin_agg_column_labels
+                ):
+                    new_index_column_pandas_labels_to_keep.append(label)
+                    new_index_column_quoted_identifiers_to_keep.append(
+                        quoted_identifier
+                    )
+
+            if len(new_index_column_pandas_labels_to_keep) > 0:
+                # if there are columns needs to be retained, we reset the index columns to the
+                # columns needs to be retained, and call reset_index with drop = False later to
+                # keep those column as data columns.
+                new_index_column_pandas_labels = new_index_column_pandas_labels_to_keep
+                new_index_column_quoted_identifiers = (
+                    new_index_column_quoted_identifiers_to_keep
+                )
+            else:
+                # if all index column needs to be dropped, we simply set drop to be True, and
+                # reset_index will drop all current index columns.
+                drop = True
+
+        query_compiler = SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=ordered_dataframe,
+                # original pandas label for data columns are still used as pandas labels
+                data_column_pandas_labels=new_data_column_pandas_labels,
+                data_column_pandas_index_names=new_data_column_index_names,
+                data_column_snowflake_quoted_identifiers=new_data_column_quoted_identifier,
+                index_column_pandas_labels=new_index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=new_index_column_quoted_identifiers,
+            )
+        )
+
+        return query_compiler if as_index else query_compiler.reset_index(drop=drop)
+
+    def groupby_apply(
+        self,
+        by: Any,
+        agg_func: Callable,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+        agg_args: Any,
+        agg_kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Group according to `by` and `level`, apply a function to each group, and combine the results.
+
+        Args
+        ----
+            by:
+                The columns or index levels to group by.
+            agg_func:
+                The function to apply to each group.
+            axis:
+                The axis along which to form groups.
+            groupby_kwargs:
+                Keyword arguments for the groupby object, i.e. for the df.groupby() call.
+            agg_args:
+                Positional arguments to pass to agg_func when applying it to each group.
+            agg_kwargs:
+                Keyword arguments to pass to agg_func when applying it to each group.
+
+        Returns
+        -------
+            A query compiler with the result.
+        """
+        level = groupby_kwargs.get("level", None)
+        if not check_is_groupby_supported_by_snowflake(by, level, axis):
+            ErrorMessage.not_implemented(
+                f"No support for groupby.apply with parameters by={by}, "
+                + f"level={level}, and axis={axis}"
+            )
+
+        sort = groupby_kwargs.get("sort", True)
+        as_index = groupby_kwargs.get("as_index", True)
+        dropna = groupby_kwargs.get("dropna", True)
+        group_keys = groupby_kwargs.get("group_keys", False)
+
+        by_pandas_labels = extract_groupby_column_pandas_labels(self, by, level)
+
+        by_snowflake_quoted_identifiers_list = [
+            quoted_identifier
+            for entry in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                by_pandas_labels
+            )
+            for quoted_identifier in entry
+        ]
+
+        snowflake_type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+
+        # TODO(SNOW-1210489): When type hints show that `agg_func` returns a
+        # scalar, we can use a vUDF instead of a vUDTF and we can skip the
+        # pivot.
+        udtf = create_udtf_for_groupby_apply(
+            agg_func,
+            agg_args,
+            agg_kwargs,
+            data_column_index=self._modin_frame.data_columns_index,
+            index_column_names=self._modin_frame.index_column_pandas_labels,
+            input_data_column_types=[
+                snowflake_type_map[quoted_identifier]
+                for quoted_identifier in self._modin_frame.data_column_snowflake_quoted_identifiers
+            ],
+            input_index_column_types=[
+                snowflake_type_map[quoted_identifier]
+                for quoted_identifier in self._modin_frame.index_column_snowflake_quoted_identifiers
+            ],
+            session=self._modin_frame.ordered_dataframe.session,
+        )
+
+        new_internal_df = self._modin_frame.ensure_row_position_column()
+        row_position_snowflake_quoted_identifier = (
+            new_internal_df.row_position_snowflake_quoted_identifier
+        )
+
+        # drop the rows if any value in groupby key is NaN
+        ordered_dataframe = new_internal_df.ordered_dataframe
+        if dropna:
+            ordered_dataframe = ordered_dataframe.dropna(
+                subset=by_snowflake_quoted_identifiers_list
+            )
+
+        """
+        Let's start with: an example to make the following implementation more clear:
+
+        We have a Snowpark Pandas DataFrame:
+        df = pd.DataFrame([['k0', 13, 'd'], ['k1', 14, 'b'], ['k0', 15, 'c']], index=pd.MultiIndex.from_tuples([(1, 3),  (1, 2), (0, 0)], names=['i1', 'i2']), columns=pd.MultiIndex.from_tuples([('a', 'group_key'), ('b', 'int_col'), ('b', 'string_col')], names=['c1', 'c2']))
+
+        looks like:
+
+                c1            a       b
+        c2    group_key int_col string_col
+        i1 i2
+        1  3         k0      13          d
+        1  2         k1      14          b
+        0  0         k0      15          c
+
+        df.groupby(['i1', ('a', 'group_key')], group_keys=True).apply(lambda grp: native_pd.concat([grp, grp * 2]) if grp.iloc[0,0] == 'k1' else grp)
+
+
+        result looks like:
+
+        c1                              a       b
+        c2                      group_key int_col string_col
+        i1 (a, group_key) i1 i2
+        0  k0             0  0         k0      15          c
+        1  k0             1  3         k0      13          d
+           k1             1  2         k1      14          b
+                             2       k1k1      28         bb
+
+        """
+
+        ordered_dataframe = ordered_dataframe.ensure_row_position_column()
+        row_position_snowflake_quoted_identifier = (
+            ordered_dataframe.row_position_snowflake_quoted_identifier
+        )
+        """
+        ordered_dataframe starts like this:
+
+        |   __i1__ |   __i2__ | ('a', 'group_key')   |   ('b', 'int_col') | ('b', 'string_col')   |   __row_position__ |
+        |---------:|---------:|:---------------------|-------------------:|:----------------------|-------------------:|
+        |        1 |        3 | k0                   |                 13 | d                     |                  0 |
+        |        1 |        2 | k1                   |                 14 | b                     |                  1 |
+        |        0 |        0 | k0                   |                 15 | c                     |                  2 |
+        """
+        ordered_dataframe = cache_result(
+            ordered_dataframe.select(
+                *by_snowflake_quoted_identifiers_list,
+                udtf(
+                    row_position_snowflake_quoted_identifier,
+                    *new_internal_df.index_column_snowflake_quoted_identifiers,
+                    *new_internal_df.data_column_snowflake_quoted_identifiers,
+                ).over(
+                    partition_by=[
+                        *by_snowflake_quoted_identifiers_list,
+                    ],
+                    order_by=row_position_snowflake_quoted_identifier,
+                ),
+            )
+        )
+
+        """
+        After applying the udtf, the underlying Snowpark DataFrame contains the group keys, followed by columns representing the UDTF results:
+
+        --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+        |   __i1__  | ('a', 'group_key') | "original_row_position"   | "row_position_within_group"|"LABEL"                                                               |"VALUE"| "first_group_key_occurence_position"
+        --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+        |     0    |       k0            |         2                 |           0                |{"index_pos": 0,  "name": "i1"}                                       | 0     | 2
+        |     0    |       k0            |         2                 |           0                |{"index_pos": 1,  "name": "i2"}                                       | 0     | 2
+        |     0    |       k0            |         2                 |           0                |{"data_pos": 0,  "0": "a", "1": "group_key", "names": ["c1", "c2"]}   | k0    | 2
+        |     0    |       k0            |         2                 |           0                |{"data_pos": 1,  "0": "b", "1": "int_col", "names": ["c1", "c2"]}     | 15    | 2
+        |     0    |       k0            |         2                 |           0                |{"data_pos": 2,  "0": "b", "1": "string_col", "names": ["c1", "c2"]}  | c     | 2
+        |     1    |       k0            |         0                 |           0                |{"index_pos": 0,  "name": "i1"}                                       | 1     | 0
+        |     1    |       k0            |         0                 |           0                |{"index_pos": 1,  "name": "i2"}                                       | 3     | 0
+        |     1    |       k0            |         0                 |           0                |{"data_pos": 0,  "0": "a", "1": "group_key", "names": ["c1", "c2"]}   | k0    | 0
+        |     1    |       k0            |         0                 |           0                |{"data_pos": 1,  "0": "b", "1": "int_col", "names": ["c1", "c2"]}     | 13    | 0
+        |     1    |       k0            |         0                 |           0                |{"data_pos": 2,  "0": "b", "1": "string_col", "names": ["c1", "c2"]}  | d     | 0
+        |     1    |       k1            |         -1                |           0                |{"index_pos": 0,  "name": "i1"}                                       | 1     | 1
+        |     1    |       k1            |         -1                |           0                |{"index_pos": 1,  "name": "i2"}                                       | 2     | 1
+        |     1    |       k1            |         -1                |           0                |{"data_pos": 0,  "0": "a", "1": "group_key", "names": ["c1", "c2"]}   | k1    | 1
+        |     1    |       k1            |         -1                |           0                |{"data_pos": 1,  "0": "b", "1": "int_col", "names": ["c1", "c2"]}     | 14    | 1
+        |     1    |       k1            |         -1                |           0                |{"data_pos": 2,  "0": "b", "1": "string_col", "names": ["c1", "c2"]}  | b     | 1
+        |     1    |       k1            |         -1                |           1                |{"index_pos": 0,  "name": "i1"}                                       | 1     | 1
+        |     1    |       k1            |         -1                |           1                |"index_pos": 1,  "name": "i2"}                                        | 2     | 1
+        |     1    |       k1            |         -1                |           1                |{"data_pos": 0,  "0": "a", "1": "group_key", "names": ["c1", "c2"]}   | k1k1  | 1
+        |     1    |       k1            |         -1                |           1                |{"data_pos": 1,  "0": "b", "1": "int_col", "names": ["c1", "c2"]}     | 28    | 1
+        |     1    |       k1            |         -1                |           1                |{"data_pos": 2,  "0": "b", "1": "string_col", "names": ["c1", "c2"]}  | bb    | 1
+        --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+        Observe:
+        - For each final output row, there are 5 entries in this table, because
+          each output row has two index levels and 3 data columns.
+        - The function acted as a transform on the groups with keys (0, 'k0') and
+          (1, 'k0'), so "original_row_position" has non-negative indices for
+          the results on those groups. However, the function did not act as a
+          transform on the group with key (1, 'k1'), since the output has more
+          rows than the input. "original_row_position" is -1 for all rows
+          resulting from that group.
+        - "first_group_key_occurence_position" is 2 for rows coming from group key
+          (0, 'k0'), because that key first occurs in row 2 of the original dataframe.
+          Likewise, (1, 'k0') gets "first_group_key_occurence_position" of 0 because
+          it occurs in row 0 of the original frame, and (1, 'k1') gets
+          "first_group_key_occurence_position" of 1 because it first occurs in row 1
+          of the original frame.
+        """
+
+        ordered_dataframe = ordered_dataframe.pivot(
+            APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER,
+            None,
+            None,
+            min_(APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER),
+        )
+
+        """
+        The pivot rotates the `func` results into separate columns, with one
+        column for each index level and each data column. The result contains
+        the by columns, then some metadata columns, then the pivoted `func`
+        result columns and index levels.
+
+        ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+        |   __i1__ | ('a', 'group_key')   | "original_row_position" | "row_position_within_group"  | "first_group_key_occurence_position"    | {"index_pos": 0,  "name": "i1"} |  {"index_pos": 1,  "name": "i2"} |  {"data_pos": 0,  "0": "a", "1": "group_key", "names": ["c1", "c2"]}  | {"data_pos": "1",  "0": "b", "1": "int_col", "names": ["c1", "c2"]}  | {"data_pos": "2",  "0": "b", "1": "string_col", "names": ["c1", "c2"]}  |
+        ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+        |     0    |       k0            |           2              |0                             |2                                        | 0                               |    0                             | c                                                                     | 15                                                                   | c
+        |     1    |       k0            |           0              |0                             |0                                        | 1                               |    3                             | d                                                                     | 13                                                                   | d
+        |     1    |       k1            |           1              |0                             |1                                        | 1                               |    2                             | b                                                                     | 14                                                                   | b
+        |     1    |       k1            |           1              |1                             |1                                        | 1                               |    2                             | bb                                                                    | 28                                                                   | b
+        ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+        """
+
+        pivot_result_data_column_snowflake_quoted_identifiers = (
+            ordered_dataframe.projected_column_snowflake_quoted_identifiers
+        )
+        num_by_columns = len(by_snowflake_quoted_identifiers_list)
+        # The following 3 columns appear after the by columns, so get their
+        # identifiers by looking at the 3 column names that follow the by
+        # column names.
+        (
+            row_position_within_group_snowflake_quoted_identifier,
+            original_row_position_snowflake_quoted_identifier,
+            group_key_appearance_order_quoted_identifier,
+        ) = pivot_result_data_column_snowflake_quoted_identifiers[
+            num_by_columns : num_by_columns + 3
+        ]
+
+        (
+            column_index_names,
+            data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers,
+        ) = get_metadata_from_groupby_apply_pivot_result_column_names(
+            pivot_result_data_column_snowflake_quoted_identifiers[
+                # the rest of the pivot result's columns represent the index and
+                # data columns of calling func() on each group.
+                (num_by_columns + 3) :
+            ]
+        )
+        # Only when func returns a dataframe does the pivot result include
+        # index columns.
+        func_returned_dataframe = len(index_column_pandas_labels) > 0
+
+        # Generate quoted identifiers for the index and data columns.
+        renamed_data_column_snowflake_quoted_identifiers = (
+            ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=data_column_pandas_labels,
+            )
+        )
+        renamed_index_column_snowflake_quoted_identifiers = (
+            ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=index_column_pandas_labels,
+                excluded=renamed_data_column_snowflake_quoted_identifiers,
+            )
+        )
+        # this is the identifier for the new index column that we'll need to
+        # add if as_index=False.
+        new_index_identifier = ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[None],
+            excluded=[
+                *renamed_data_column_snowflake_quoted_identifiers,
+                *renamed_index_column_snowflake_quoted_identifiers,
+            ],
+        )[0]
+
+        if func_returned_dataframe:
+            # follow pandas behavior: when `func` returns a dataframe, respect
+            # as_index=False if and only if group_keys=True.
+            # not sure whether that's a pandas bug:
+            # https://github.com/pandas-dev/pandas/issues/57656
+            if not as_index and not group_keys:
+                as_index = True
+            else:
+                as_index = as_index
+
+        ordered_dataframe = groupby_apply_pivot_result_to_final_ordered_dataframe(
+            ordered_dataframe=ordered_dataframe,
+            agg_func=agg_func,
+            by_snowflake_quoted_identifiers_list=by_snowflake_quoted_identifiers_list,
+            sort_method=groupby_apply_sort_method(
+                sort,
+                group_keys,
+                original_row_position_snowflake_quoted_identifier,
+                ordered_dataframe,
+                func_returned_dataframe,
+            ),
+            as_index=as_index,
+            original_row_position_snowflake_quoted_identifier=original_row_position_snowflake_quoted_identifier,
+            group_key_appearance_order_quoted_identifier=group_key_appearance_order_quoted_identifier,
+            row_position_within_group_snowflake_quoted_identifier=row_position_within_group_snowflake_quoted_identifier,
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+            renamed_data_column_snowflake_quoted_identifiers=renamed_data_column_snowflake_quoted_identifiers,
+            renamed_index_column_snowflake_quoted_identifiers=renamed_index_column_snowflake_quoted_identifiers,
+            new_index_identifier=new_index_identifier,
+            func_returned_dataframe=func_returned_dataframe,
+        )
+        return SnowflakeQueryCompiler(
+            groupby_apply_create_internal_frame_from_final_ordered_dataframe(
+                ordered_dataframe=ordered_dataframe,
+                func_returned_dataframe=func_returned_dataframe,
+                as_index=as_index,
+                group_keys=group_keys,
+                by_pandas_labels=by_pandas_labels,
+                by_snowflake_quoted_identifiers=by_snowflake_quoted_identifiers_list,
+                func_result_data_column_pandas_labels=data_column_pandas_labels,
+                func_result_data_column_snowflake_quoted_identifiers=renamed_data_column_snowflake_quoted_identifiers,
+                func_result_index_column_pandas_labels=index_column_pandas_labels,
+                func_result_index_column_snowflake_quoted_identifiers=renamed_index_column_snowflake_quoted_identifiers,
+                column_index_names=column_index_names,
+                new_index_identifier=new_index_identifier,
+                original_data_column_pandas_labels=self._modin_frame.data_column_pandas_labels,
+            )
+        )
+
+    def groupby_rank(
+        self,
+        by: Any,
+        groupby_kwargs: dict[str, Any],
+        agg_args: Any,
+        agg_kwargs: dict[str, Any],
+        axis: Axis = 0,
+        method: Literal["average", "min", "max", "first", "dense"] = "average",
+        na_option: Literal["keep", "top", "bottom"] = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Compute groupby with rank.
+
+        Parameters
+        ----------
+        by:
+            The columns or index levels to group by.
+        axis: {0}
+        method: {"average", "min", "max", "first", "dense"}
+            How to rank the group of records that have the same value (i.e. break ties):
+            - average: average rank of the group
+            - min: lowest rank in the group
+            - max: highest rank in the group
+            - first: ranks assigned in order they appear in the array
+            - dense: like 'min', but rank always increases by 1 between groups.
+        na_option: {"keep", "top", "bottom"}
+            How to rank NaN values:
+            - keep: assign NaN rank to NaN values
+            - top: assign lowest rank to NaN values
+            - bottom: assign highest rank to NaN values
+        ascending: bool
+            Whether the elements should be ranked in ascending order.
+        pct: bool
+            Whether to display the returned rankings in percentile form.
+        groupby_kwargs:
+            Keyword arguments for the groupby object, i.e. for the df.groupby() call.
+        agg_args:
+            Positional arguments to pass to agg_func when applying it to each group.
+        agg_kwargs:
+            Keyword arguments to pass to agg_func when applying it to each group.
+
+        Returns
+        -------
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"group": ["a", "a", "a", "b", "b", "b", "b"], "value": [2, 4, 2, 3, 5, 1, 2]})
+        >>> df
+            group   value
+        0   a	    2
+        1   a	    4
+        2   a	    2
+        3   b       3
+        4   b       5
+        5   b       1
+        6   b       2
+        >>> df = df.groupby("group").rank(method='min')
+        >>> df
+            value
+        0   1.0
+        1   3.0
+        2   1.0
+        3   3.0
+        4   4.0
+        5   1.0
+        6   2.0
+        """
+        level = groupby_kwargs.get("level", None)
+        dropna = groupby_kwargs.get("dropna", True)
+
+        if not check_is_groupby_supported_by_snowflake(by, level, axis):
+            ErrorMessage.not_implemented(
+                f"GroupBy rank with by = {by}, level = {level} and axis = {axis} is not supported yet in Snowpark pandas."
+            )
+
+        if level is not None and level != 0:
+            ErrorMessage.not_implemented(
+                f"GroupBy rank with level = {level} is not supported yet in Snowpark pandas."
+            )
+
+        query_compiler = self
+        original_frame = query_compiler._modin_frame
+        ordered_dataframe = original_frame.ordered_dataframe
+        ordering_column_identifiers = (
+            original_frame.ordering_column_snowflake_quoted_identifiers
+        )
+
+        by_list = extract_groupby_column_pandas_labels(self, by, level)
+        by_snowflake_quoted_identifiers_list = [
+            entry[0]
+            for entry in original_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                by_list
+            )
+        ]
+
+        pandas_labels = []
+        new_cols = []
+        partition_list = by_snowflake_quoted_identifiers_list.copy()
+        for col_label, col_ident in zip(
+            original_frame.data_column_pandas_labels,
+            original_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            if col_ident not in by_snowflake_quoted_identifiers_list:
+                count_alias = ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=["c_" + col_label]
+                )[0]
+                # Partition by group columns and current data column
+                partition_list.append(col_ident)
+
+                # Frame to record count of non-null values
+                count_df = ordered_dataframe.select(
+                    col_ident,
+                    count("*")
+                    .over(Window.partition_by(partition_list))
+                    .alias(count_alias),
+                ).ensure_row_position_column()
+                # Count value is used for calculating max and average rank from
+                # min rank in function make_groupby_rank_col_for_method
+                count_val = col(
+                    count_df.projected_column_snowflake_quoted_identifiers[1]
+                )
+
+                # Resulting rank column
+                rank_col = make_groupby_rank_col_for_method(
+                    col_ident,
+                    by_snowflake_quoted_identifiers_list,
+                    method,
+                    na_option,
+                    ascending,
+                    pct,
+                    ordering_column_identifiers,
+                    count_val,
+                    dropna,
+                )
+                new_cols.append(rank_col)
+                pandas_labels.append(col_label)
+                partition_list.remove(col_ident)
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.project_columns(pandas_labels, new_cols)
+        )
+
+    def groupby_shift(
+        self,
+        by: Any,
+        axis: int,
+        level: int,
+        periods: int,
+        freq: str,
+        fill_value: Any,
+        is_series_groupby: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        compute groupby with shift.
+        Note: this variant of pandas groupby is more of a window based LEAD/LAG calculation than a GROUPBY in SQL
+        With a dataframe created with following:
+        import pandas as pd
+
+        data = [[1,2,3], [1, 5, 6], [2, 5, 8], [2, 6, 9]]
+
+        df = pd.DataFrame(data, columns=["a", "b", "c"], index = ["tuna", "salmon", "catfish", "goldfish"])
+
+        df
+
+                  a  b  c
+
+        tuna      1  2  3
+        salmon    1  5  6
+        catfish   2  5  8
+        goldfish  2  6  9
+
+        df.groupby("a").shift(1)
+
+                    b    c
+
+        tuna      NaN  NaN
+        salmon    2.0  3.0
+        catfish   NaN  NaN
+        goldfish  5.0  8.0
+
+        Note that the type of the data has changed to decimal - this might be because of the need
+        to introduce NULLs.
+
+        data = [1, 2, 3, 4, 5]
+
+        df = pd.DataFrame(data, columns=["a", "b", "c"], index = ["tuna", "salmon", "catfish", "goldfish"])
+
+        df
+
+                  a  b  c
+
+        tuna      1  2  3
+        salmon    1  5  6
+        catfish   2  5  8
+        goldfish  2  6  9
+
+        df.groupby("a").shift(1)
+
+                    b    c
+
+        tuna      NaN  NaN
+        salmon    2.0  3.0
+        catfish   NaN  NaN
+        goldfish  5.0  8.0
+
+        In [2]: data = [1,2,3,4,5]
+
+        In [3]: index = ["tuna", "salmon", "catfish", "goldfish", "promfret"]
+
+        In [4]: series = pd.Series(data=data, index=index)
+
+        In [5]: series
+        Out[5]:
+        tuna        1
+        salmon      2
+        catfish     3
+        goldfish    4
+        promfret    5
+        dtype: int64
+
+        In [6]: series.groupby(level=0).shift(3)
+        Out[6]:
+        tuna       NaN
+        salmon     NaN
+        catfish    NaN
+        goldfish   NaN
+        promfret   NaN
+        dtype: float64
+
+
+        Args:
+            periods: Number of periods to shift by.
+            freq: the frequency specified as a string.
+            axis: 0 (index), 1 (columns)
+            fill_value: Value to use in place of missing values.
+            suffix: disambiguating columns if multiple periods are specified.
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+        # TODO: handle cases where the fill_value has a different type from
+        # the column. SNOW-990325 deals with fillna that has a similar problem.
+
+        if not isinstance(periods, int):
+            if isinstance(periods, float):
+                if not periods.is_integer():
+                    raise TypeError("an integer is required for periods")
+            else:
+                raise TypeError("an integer is required for periods")
+
+        # TODO: SNOW-1006626 tracks follow on work for supporting Multiindex
+        if self._modin_frame.is_multiindex():
+            ErrorMessage.not_implemented(
+                "GroupBy Shift with multi index is not supported yet in Snowpark pandas."
+            )
+
+        # TODO: SNOW-1006626 tracks follow on work for supporting External by
+        if isinstance(by, list):
+            if any(
+                by_element
+                for by_element in by
+                if by_element not in self._modin_frame.data_column_pandas_labels
+            ):
+                ErrorMessage.not_implemented(
+                    "GroupBy Shift with external by is not supported yet in Snowpark pandas."
+                )
+
+        if not check_is_groupby_supported_by_snowflake(by, level, axis):
+            ErrorMessage.not_implemented(
+                f"GroupBy Shift with by = {by}, level = {level} and axis = {axis} is not supported yet in Snowpark pandas."
+            )
+
+        # TODO: SNOW-1006626 tracks follow on work for supporting these parameters
+        if (level is not None and level != 0) or axis != 0 or freq is not None:
+            ErrorMessage.not_implemented(
+                "GroupBy Shift with parameter axis != 0, freq != None, "
+                + "level != None, sort, dropna or observed is not supported yet in Snowpark pandas."
+            )
+
+        by_list = extract_groupby_column_pandas_labels(self, by, level)
+
+        # TODO: SNOW-1006626 should fix this.
+        if (
+            not is_series_groupby
+            and self._modin_frame.index_column_pandas_labels is not None
+            and by_list is not None
+            and len(by_list) > 0
+            and any(
+                by_column in self._modin_frame.index_column_pandas_labels
+                for by_column in by_list
+            )
+        ):
+            ErrorMessage.not_implemented(
+                "GroupBy Shift with a by parameter column that is part of the index is not supported yet in Snowpark pandas."
+            )
+
+        func = lead if periods < 0 else lag
+        periods = abs(periods)
+
+        by_snowflake_quoted_identifiers_list = [
+            entry[0]
+            for entry in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                by_list
+            )
+        ]
+
+        pandas_labels = []
+        if periods != 0:
+            new_columns = []
+            for pandas_label, snowflake_quoted_identifier in zip(
+                self._modin_frame.data_column_pandas_labels,
+                self._modin_frame.data_column_snowflake_quoted_identifiers,
+            ):
+                if (
+                    snowflake_quoted_identifier
+                    not in by_snowflake_quoted_identifiers_list
+                ):
+                    window = Window.partition_by(
+                        by_snowflake_quoted_identifiers_list
+                    ).order_by(
+                        self._modin_frame.ordered_dataframe.ordering_column_snowflake_quoted_identifiers
+                    )
+
+                    new_col = func(
+                        snowflake_quoted_identifier, periods, fill_value
+                    ).over(window)
+
+                    pandas_labels.append(pandas_label)
+                    new_columns.append(new_col)
+            return SnowflakeQueryCompiler(
+                self._modin_frame.project_columns(pandas_labels, new_columns)
+            )
+
+        snowflake_quoted_identifiers = []
+        for pandas_label, col_name in zip(
+            self._modin_frame.data_column_pandas_labels,
+            self._modin_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            if col_name not in by_snowflake_quoted_identifiers_list:
+                snowflake_quoted_identifiers.append(col_name)
+                pandas_labels.append(pandas_label)
+
+        new_ordered_dataframe = self._modin_frame.ordered_dataframe.select(
+            snowflake_quoted_identifiers
+            + self._modin_frame.index_column_snowflake_quoted_identifiers
+        )
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=new_ordered_dataframe,
+                data_column_pandas_labels=pandas_labels,
+                data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+                data_column_snowflake_quoted_identifiers=snowflake_quoted_identifiers,
+                index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    def groupby_groups(
+        self,
+        by: Any,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+    ) -> PrettyDict[Hashable, pd.Index]:
+        """
+        Get a PrettyDict mapping group keys to row labels.
+
+        Arguments:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Use this to determine the groups.
+            axis: 0 (index) or 1 (columns)
+            groupby_kwargs: keyword arguments passed for the groupby.
+
+        Returns:
+            PrettyDict: a map from group keys to row labels.
+        """
+
+        """
+        To get .groups, we have to group by the `by` columns / index levels
+        as usual, and then aggregate the index columns into a list. Because
+        groupby_agg() will only aggregate the data columns, and not the index
+        columns, we copy the index columns into new data columns. We then
+        aggregate those new data columns into arrays.
+
+        In the comments below, we start with this example:
+
+        >>> df = pd.DataFrame([[0, 1, 2], [4, 5, 2], [0, 8, 9]], columns=['col0', 'col1', 'col2']).set_index(['col0', 'col1'])
+        >>> df
+                    col2
+        col0 col1
+        0    1        2
+        4    5        2
+        0    8        9
+        >>> df.groupby(by='col2').groups
+        """
+
+        """
+        0. Copy the index columns into new data columns. After this step:
+
+        >>> query_compiler.to_pandas()
+                   col2  _snowpark_group_key0  _snowpark_group_key1
+        col0 col1
+        0    1        2                     0                     1
+        4    5        2                     4                     5
+        0    8        9                     0                     8
+        """
+        original_index_names = self.get_index_names()
+        frame = self._modin_frame
+        index_data_columns = []
+        for i, index_identifier in enumerate(
+            frame.index_column_snowflake_quoted_identifiers
+        ):
+            index_data_column = f"_snowpark_group_key{i}"
+            index_data_columns.append(index_data_column)
+            frame = frame.append_column(index_data_column, col(index_identifier))
+        query_compiler = SnowflakeQueryCompiler(frame)
+
+        """
+        1. Now aggregate each index column separately into an array,
+        and convert to pandas.
+
+        After this step:
+
+        >>> aggregated_as_pandas
+
+                  _snowpark_group_key0   _snowpark_group_key1
+            col2
+            2                   [0, 4]                 [1, 5]
+            9                      [0]                    [8]
+        """
+        aggregated_as_pandas = query_compiler.groupby_agg(
+            by,
+            {k: "array_agg" for k in index_data_columns},
+            axis,
+            groupby_kwargs,
+            agg_args=[],
+            agg_kwargs={},
+        ).to_pandas()
+
+        """
+        2. Massage the resulting pandas dataframe into the final dictionary
+        """
+        return PrettyDict(
+            # if the index has only one level, the dataframe has only one
+            # column corresponding to the single level of the index. Convert
+            # the dataframe to a series. e.g. turn
+            #      _snowpark_group_key0
+            #  2   [0, 4]
+            #  9   [0]
+            #
+            # into {2: pd.Index([0, 4]), 9: pd.Index([0])}
+            aggregated_as_pandas.iloc[:, 0].map(
+                lambda v: pd.Index(
+                    v,
+                    # note that the index dtype has to match the original
+                    # index's dtype, even if we could use a more restrictive
+                    # type for this portion of the index.
+                    dtype=self.index_dtypes[0],
+                    name=original_index_names[0],
+                )
+            )
+            if len(original_index_names) == 1
+            # If there are multiple levels, each row represents that index
+            # level's values for a particular group key. e.g.
+            #      _snowpark_group_key0    _snowpark_group_key1
+            #  2   [0, 4]                  [1, 5]
+            #  9   [0]                     [8]
+            #
+            # for each row, we need to get a multiindex where level i of each
+            # multiindex is equal to the _snowpark_group_key{i}. so for the
+            # above example:
+            # {2: pd.Index([(0, 1), (4, 5)]), 9: pd.Index([(0, 8)])
+            else aggregated_as_pandas.apply(
+                lambda row: pd.MultiIndex.from_arrays(
+                    [
+                        # note that the index dtype has to match the original
+                        # index's dtype, even if we could use a more restrictive
+                        # type for this portion of the index.
+                        pd.Index(
+                            row.iloc[i],
+                            name=original_index_name,
+                            dtype=index_dtype,
+                        )
+                        for i, (original_index_name, index_dtype) in enumerate(
+                            zip(
+                                original_index_names,
+                                self.index_dtypes,
+                            )
+                        )
+                    ]
+                ),
+                axis=1,
+            )
+        )
+
+    def groupby_indices(
+        self,
+        by: Any,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+    ) -> dict[Hashable, np.ndarray]:
+        """
+        Get a dict mapping group keys to row labels.
+
+        Arguments:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Use this to determine the groups.
+            axis: 0 (index) or 1 (columns)
+            groupby_kwargs: keyword arguments passed for the groupby.
+
+        Returns:
+            dict: a map from group keys to row labels.
+        """
+        frame = self._modin_frame.ensure_row_position_column()
+        return dict(
+            # .indices aggregates row position numbers, so we add a row
+            # position data column and then aggregate that.
+            SnowflakeQueryCompiler(
+                frame.append_column(
+                    "_snowpark_groupby_indices_position",
+                    SnowparkColumn(frame.row_position_snowflake_quoted_identifier),
+                )
+            )
+            .groupby_agg(
+                by=by,
+                agg_func={"_snowpark_groupby_indices_position": "array_agg"},
+                axis=axis,
+                groupby_kwargs=groupby_kwargs,
+                agg_args=[],
+                agg_kwargs={},
+            )
+            .to_pandas()
+            .iloc[:, 0]
+            .map(np.array)
+        )
+
+    def groupby_cumcount(
+        self,
+        by: Any,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+        ascending: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Number each item in each group from 0 to the length of that group - 1.
+
+        Args:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Used to determine the groups for the groupby.
+            axis : 0 (index), 1 (columns)
+            groupby_kwargs: Dict[str, Any]
+                keyword arguments passed for the groupby.
+            ascending : bool
+                If False, number in reverse, from length of group - 1 to 0.
+
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+        return SnowflakeQueryCompiler(
+            get_groupby_cumagg_frame_axis0(
+                self,
+                by=by,
+                axis=axis,
+                numeric_only=False,
+                groupby_kwargs=groupby_kwargs,
+                cumagg_func=count,
+                cumagg_func_name="cumcount",
+                ascending=ascending,
+            )
+        )
+
+    def groupby_cummax(
+        self,
+        by: Any,
+        axis: int,
+        numeric_only: bool,
+        groupby_kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Cumulative max for each group.
+
+        Args:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Used to determine the groups for the groupby.
+            axis : 0 (index), 1 (columns)
+            numeric_only: bool
+                Include only float, int, boolean columns.
+            groupby_kwargs: Dict[str, Any]
+                keyword arguments passed for the groupby.
+
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+        return SnowflakeQueryCompiler(
+            get_groupby_cumagg_frame_axis0(
+                self,
+                by=by,
+                axis=axis,
+                numeric_only=numeric_only,
+                groupby_kwargs=groupby_kwargs,
+                cumagg_func=max_,
+                cumagg_func_name="cummax",
+            )
+        )
+
+    def groupby_cummin(
+        self,
+        by: Any,
+        axis: int,
+        numeric_only: int,
+        groupby_kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Cumulative min for each group.
+
+        Args:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Used to determine the groups for the groupby.
+            axis : 0 (index), 1 (columns)
+            numeric_only: bool
+                Include only float, int, boolean columns.
+            groupby_kwargs: Dict[str, Any]
+                keyword arguments passed for the groupby.
+
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+        return SnowflakeQueryCompiler(
+            get_groupby_cumagg_frame_axis0(
+                self,
+                by=by,
+                axis=axis,
+                numeric_only=numeric_only,
+                groupby_kwargs=groupby_kwargs,
+                cumagg_func=min_,
+                cumagg_func_name="cummin",
+            )
+        )
+
+    def groupby_cumsum(
+        self,
+        by: Any,
+        axis: int,
+        groupby_kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Cumulative sum for each group.
+
+        Args:
+            by: mapping, series, callable, label, pd.Grouper, BaseQueryCompiler, list of such.
+                Used to determine the groups for the groupby.
+            axis : 0 (index), 1 (columns)
+            groupby_kwargs: Dict[str, Any]
+                keyword arguments passed for the groupby.
+
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+        return SnowflakeQueryCompiler(
+            get_groupby_cumagg_frame_axis0(
+                self,
+                by=by,
+                axis=axis,
+                numeric_only=False,
+                groupby_kwargs=groupby_kwargs,
+                cumagg_func=sum_,
+                cumagg_func_name="cumsum",
+            )
+        )
+
+    def _get_dummies_helper(
+        self,
+        column: Hashable,
+        prefix: Hashable,
+        prefix_sep: str,
+    ) -> "SnowflakeQueryCompiler":
+        dummy_column_name = random_name_for_temp_object(TempObjectType.COLUMN)
+        # We need to add a column that will help us differentiate between identical
+        # rows, so that we do not have aggregations happen on duplicate rows.
+        # We will use a random name for this column.
+        query_compiler = SnowflakeQueryCompiler(
+            self._modin_frame.ensure_row_position_column().append_column(
+                dummy_column_name, pandas_lit(1)
+            )
+        )
+
+        ordered_frame = query_compiler._modin_frame.ordered_dataframe
+        get_dummies_column_snowflake_quoted_identifier = column
+
+        for snowflake_quoted_identifier, pandas_label in zip(
+            self._modin_frame.data_column_snowflake_quoted_identifiers,
+            self._modin_frame.data_column_pandas_labels,
+        ):
+            if column == pandas_label:
+                get_dummies_column_snowflake_quoted_identifier = (
+                    snowflake_quoted_identifier
+                )
+
+        agg_exprs = [min_(dummy_column_name)]
+        ret_frame = ordered_frame.pivot(
+            col(str(get_dummies_column_snowflake_quoted_identifier)),
+            None,
+            0,
+            *agg_exprs,
+        )
+
+        # pivot moves all columns into the data column list - the index columns and ordering columns
+        # need to be put back. This is done at the bottom of this function. For now these extraneous
+        # columns have to be removed from the data column list.
+
+        data_column_snowflake_quoted_identifiers = (
+            ret_frame.projected_column_snowflake_quoted_identifiers
+        )
+
+        # We want to add the new columns generated by pivot to the old columns
+        # we already had in the Internal Frame, as well as remove the pivoted
+        # column.
+
+        # EXAMPLE:
+        # Say we have the following DataFrame:
+        #
+        #    A  C
+        # 0  a  1
+        # 1  b  2
+        # 2  a  3
+        #
+        # the result of calling get_dummies on it will be:
+        #
+        #    C  A_a  A_b
+        # 0  1    1    0
+        # 1  2    0    1
+        # 2  3    1    0
+        #
+        # We need to get the snowflake quoted identifiers for
+        # the resulting frame.
+        #
+        # The first step, is to start with the snowflake
+        # quoted identifiers from the original InternalFrame.
+        # identifiers = ["A", "C"]
+        #
+        # We then remove the pivoted column from the identifiers
+        # since it is no longer present.
+        # identifiers = ["C"]
+        #
+        # We then need to get the new result columns.
+        # identifiers = ["C"]
+        # new_result_columns = ["A_a", "A_b"]
+        #
+        # We finally add these new columns to the identifiers to get
+        # the final set of identifiers.
+        # identifiers = ["C", "A_a", "A_b"]
+
+        # First: We get the data column snowflake quoted identifiers
+        # from the InternalFrame before we did this operation.
+        frame_data_column_snowflake_quoted_identifiers = (
+            self._modin_frame.data_column_snowflake_quoted_identifiers
+        )
+
+        # Next: We remove the column that we have pivoted on, since
+        # it will not be in the resulting InternalFrame.
+        frame_data_column_snowflake_quoted_identifiers.remove(
+            get_dummies_column_snowflake_quoted_identifier
+        )
+
+        # Next: We need to find out the snowflake quoted identifiers for
+        # the new columns - i.e. the columns that came from the values of
+        # the column we were pivoting on.
+        pivot_result_column_snowflake_quoted_identifiers = filter(
+            lambda snowflake_quoted_identifier: snowflake_quoted_identifier
+            not in ordered_frame.projected_column_snowflake_quoted_identifiers,
+            data_column_snowflake_quoted_identifiers,
+        )
+
+        # Last: We need to add these new columns to the data column
+        # snowflake quoted identifiers for the new frame.
+        frame_data_column_snowflake_quoted_identifiers.extend(
+            pivot_result_column_snowflake_quoted_identifiers
+        )
+
+        ordering_columns = [
+            OrderingColumn(ordered_frame.row_position_snowflake_quoted_identifier)
+        ]
+
+        ordered_ret_frame = OrderedDataFrame(
+            ret_frame._dataframe_ref,
+            projected_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            ordering_columns=ordering_columns,
+            row_position_snowflake_quoted_identifier=query_compiler._modin_frame.row_position_snowflake_quoted_identifier,
+        )
+
+        new_col_map = {}
+
+        new_data_column_pandas_labels = []
+
+        # remove the index columns from the list - they'll get added back to the internal frame
+        # at the end of the method before return.
+        for (
+            index_column_name
+        ) in query_compiler._modin_frame.index_column_snowflake_quoted_identifiers:
+            if index_column_name in data_column_snowflake_quoted_identifiers:
+                data_column_snowflake_quoted_identifiers.remove(index_column_name)
+
+        if (
+            self._modin_frame.row_position_snowflake_quoted_identifier
+            in data_column_snowflake_quoted_identifiers
+        ):
+            data_column_snowflake_quoted_identifiers.remove(
+                self._modin_frame.row_position_snowflake_quoted_identifier
+            )
+
+        data_column_snowflake_quoted_identifiers_to_pandas_label_map = (
+            get_snowflake_quoted_identifier_to_pandas_label_mapping(
+                self._modin_frame.data_column_snowflake_quoted_identifiers,
+                self._modin_frame.data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers,
+            )
+        )
+
+        if prefix is None:
+            prefix = ""
+            prefix_sep = ""
+
+        for result_column_name in frame_data_column_snowflake_quoted_identifiers:
+
+            pandas_col_name = (
+                data_column_snowflake_quoted_identifiers_to_pandas_label_map[
+                    result_column_name
+                ]
+            )
+
+            if (
+                result_column_name
+                not in ordered_frame.projected_column_snowflake_quoted_identifiers
+                or pandas_col_name == column
+            ):
+                if (
+                    isinstance(pandas_col_name, str)
+                    and pandas_col_name.startswith("'")
+                    and pandas_col_name.endswith("'")
+                ):
+                    pandas_col_name = pandas_col_name[1:-1]
+                new_pandas_col_name = f"{prefix}{prefix_sep}{pandas_col_name}"
+                if new_pandas_col_name:
+                    new_col_map[result_column_name] = quote_name_without_upper_casing(
+                        new_pandas_col_name
+                    )
+                new_data_column_pandas_labels.append(new_pandas_col_name)
+            else:
+                new_data_column_pandas_labels.append(pandas_col_name)
+
+        # code below fixes up columns so that they occupy their rightful place as the ordering columns or
+        # index columns or data columns
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=ordered_ret_frame,
+            data_column_pandas_labels=new_data_column_pandas_labels,
+            data_column_pandas_index_names=query_compiler._modin_frame.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=frame_data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=query_compiler._modin_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=query_compiler._modin_frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        if len(new_col_map) > 0:
+            new_internal_frame = new_internal_frame.rename_snowflake_identifiers(
+                new_col_map
+            )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def get_dummies(
+        self,
+        prefix: Optional[Union[Hashable, list[Hashable]]],
+        prefix_sep: str = "_",
+        dummy_na: bool = False,
+        columns: Optional[Union[Hashable, list[Hashable]]] = None,
+        drop_first: bool = False,
+        dtype: Optional[npt.DTypeLike] = None,
+        is_series: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+
+        """
+        Implement one-hot encoding.
+        Args:
+            prefix: String to append to newly generated column names.
+            prefi_sep: Separator between prefix and column name.
+            dummy_na: Add a column for nulls.
+            columns: Columns to pivot on.
+            drop_first: drop the first value.
+            dtype: Type of resulting columns.
+        Returns:
+            A new SnowflakeQueryCompiler instance after applying the get_dummies operation.
+        Examples:
+        s = pd.Series(list('abca'))
+        pd.get_dummies(s)
+           a  b  c
+        0  1  0  0
+        1  0  1  0
+        2  0  0  1
+        3  1  0  0
+        df = pd.DataFrame({'A':['a','b','a'], 'B':['b', 'a', 'c'], 'C':[1, 2, 3]})
+        pd.get_dummies(df, prefix=['col1', 'col2'])
+        C  col1_a  col1_b  col2_a  col2_b  col2_c
+        0  1       1       0       0       1       0
+        1  2       0       1       1       0       0
+        2  3       1       0       0       0       1
+        """
+        if dummy_na is True or drop_first is True or dtype is not None:
+            ErrorMessage.not_implemented(
+                "get_dummies with non-default dummy_na, drop_first, and dtype parameters"
+                + " is not supported yet in Snowpark pandas."
+            )
+        if columns is None:
+            columns = [
+                col_name
+                for (col_index, col_name) in enumerate(
+                    self._modin_frame.data_column_pandas_labels
+                )
+                if is_series or is_string_dtype(self.dtypes[col_index])
+            ]
+
+        if not isinstance(columns, list):
+            columns = [columns]
+
+        # TODO: SNOW-1006947 enable support for get_dummies on columns of non-string types.
+        for col_name in self._modin_frame.data_column_pandas_labels:
+            if col_name in columns and not is_string_dtype(self.dtypes[col_name]):
+                ErrorMessage.not_implemented(
+                    "get_dummies with non-string columns parameter"
+                    + " is not supported yet in Snowpark pandas."
+                )
+
+        if prefix is None and not is_series:
+            prefix = [
+                col_name
+                for (col_index, col_name) in enumerate(
+                    self._modin_frame.data_column_pandas_labels
+                )
+                if self._modin_frame.is_unnamed_series()
+                or is_string_dtype(self.dtypes[col_index])
+            ]
+
+        if not isinstance(prefix, list):
+            prefix = [prefix]
+
+        if prefix_sep is None:
+            prefix_sep = "_"
+
+        query_compiler = self
+        for (pandas_column_name, column_prefix) in zip(columns, prefix):
+            query_compiler = query_compiler._get_dummies_helper(
+                pandas_column_name,
+                column_prefix,
+                prefix_sep,
+            )
+
+        return query_compiler
+
+    def agg(
+        self,
+        func: AggFuncType,
+        axis: int,
+        args: Any,
+        kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Aggregate using one or more operations over the specified axis.
+
+        Args:
+            func: callable, str, list or dict.
+                The aggregation functions to apply on
+            axis : 0 (index), 1 (columns)
+            args: the arguments passed for the aggregation
+            kwargs: keyword arguments passed for the aggregation function.
+        """
+        numeric_only = kwargs.get("numeric_only", False)
+        # Call fallback if the aggregation function passed in the arg is currently not supported
+        # by snowflake engine.
+        if not check_is_aggregation_supported_in_snowflake(func, kwargs, axis):
+            ErrorMessage.not_implemented(
+                f"Aggregate function {func} with parameters "
+                + ", ".join([f"{k}={v}" for k, v in kwargs.items()])
+                + " not supported yet in Snowpark pandas."
+            )
+
+        query_compiler = self
+        if numeric_only:
+            # drop off the non-numeric data columns if the data column if numeric_only is configured to be True
+            query_compiler = drop_non_numeric_data_columns(
+                query_compiler, pandas_labels_for_columns_to_exclude=[]
+            )
+
+        if len(query_compiler.columns) == 0:
+            return pd.Series()._query_compiler
+
+        internal_frame = query_compiler._modin_frame
+
+        single_agg_func_query_compilers = []
+        # If every row specified in the dict has only a single aggregation, which was not provided
+        # as a list, then all aggregations should be coalesced together into a single column.
+        #
+        # This is illustrated by the difference between these two calls. In the first,
+        # the members of the dictionary are scalar, so the result should be a series with
+        # unnamed columns. In the second, one member was specified as a 1-item list, so
+        # the result should have separate columns for each aggregation function as usual.
+        # >>> pd.DataFrame({"a": [0, 1], "b": [2, 3]}).agg({1: "max", 0: "min"}, axis=1)
+        # 1    3
+        # 0    0
+        # dtype: int64
+        # >>> pd.DataFrame({"a": [0, 1], "b": [2, 3]}).agg({1: "max", 0: ["min"]}, axis=1)
+        #    max  min
+        # 1  3.0  NaN
+        # 0  NaN  0.0
+        should_squeeze = is_dict_like(func) and all(
+            not is_list_like(value) for value in func.values()
+        )
+        if axis == 1:
+            if self.is_multiindex():
+                # TODO SNOW-1010307 fix axis=1 behavior with MultiIndex
+                ErrorMessage.not_implemented(
+                    "axis=1 aggregations with MultiIndex are not yet supported"
+                )
+            data_col_identifiers = (
+                internal_frame.data_column_snowflake_quoted_identifiers
+            )
+
+            if is_dict_like(func):
+                # This branch is taken if `func` is a dict.
+                # For example, suppose we're computing
+                # `pd.DataFrame({"a": [0, 1], "b": [2, 3]}).agg({1: ["max"], 0: ["min", "max"]})`
+                # where the output should be
+                #    max  min
+                # 1  3.0  NaN
+                # 0  2.0  0.0
+                #
+                # The element at row label 1/column "min" is NaN because the `min` aggregation was
+                # not specified for that row label.
+                agg_funcs = [
+                    (
+                        get_frame_by_row_label(
+                            internal_frame=self._modin_frame, key=(row_label,)
+                        ),
+                        fn if is_list_like(fn) else [fn],
+                    )
+                    for row_label, fn in func.items()
+                ]
+            else:
+                # If `func` is a scalar or list, every specified aggregation is applied to every row
+                # in the frame without the need to union_all later.
+                # It is possible for the result to have only one column but return a DF rather than Series
+                # (as in `df.min(["min"], axis=1)`). This case is handled by the frontend.
+                agg_funcs = [(internal_frame, func if is_list_like(func) else [func])]
+
+            # If `func` is a dict, apply the specified aggregation functions to each row.
+            # For every row label specified in the `func` dict, we call the specified aggregation
+            # functions to produce a 1xN frame.
+            # We concat the resulting aggregations for each row together.
+            #
+            # If `func` is a scalar or list, then all aggregation functions are applied to every
+            # row. In this case, `agg_funcs` should have exactly one element in it.
+            for frame, agg_args in agg_funcs:
+                agg_col_map = {
+                    MODIN_UNNAMED_SERIES_LABEL
+                    if should_squeeze
+                    else get_pandas_aggr_func_name(
+                        agg_arg
+                    ): _columns_coalescing_idxmax_idxmin_helper(
+                        *(col(c) for c in data_col_identifiers),
+                        axis=1,
+                        func=agg_arg,
+                        keepna=not kwargs.get("skipna", True),
+                        pandas_column_labels=frame.data_column_pandas_labels,
+                    )
+                    if agg_arg in ("idxmin", "idxmax")
+                    else generate_rowwise_aggregation_function(agg_arg, kwargs)(
+                        *(col(c) for c in data_col_identifiers)
+                    )
+                    for agg_arg in agg_args
+                }
+                single_agg_func_query_compilers.append(
+                    SnowflakeQueryCompiler(
+                        frame.project_columns(
+                            list(agg_col_map.keys()), list(agg_col_map.values())
+                        )
+                    )
+                )
+        else:  # axis == 0
+            # get a map between the Snowpark pandas column to the aggregation function needs to be applied on the column
+            column_to_agg_func = convert_agg_func_arg_to_col_agg_func_map(
+                internal_frame,
+                func,
+                pandas_labels_for_columns_to_exclude_when_agg_on_all=[],
+            )
+
+            # get a map between each aggregation function and the columns needs to apply this aggregation function
+            agg_func_to_col_map = get_agg_func_to_col_map(column_to_agg_func)
+
+            # aggregation creates an index column with the aggregation function names as its values
+            # For example: with following dataframe
+            #       A   B   C
+            #   0   1   2   3
+            #   1   4   5   6
+            #   2   7   8   9
+            # after we call df.aggregate({"A": ["min"], "B": ["max"]}), the result is following
+            #       A   B
+            # min   1   NaN
+            # max   NaN	8
+            #
+            # However, if all values in the agg_func dict are scalar strings/functions rather than lists,
+            # then the result will instead be a Series:
+            # >>> df.aggregate({"A": "min", "B": "max"})
+            # 0    1
+            # 1    8
+            # dtype: int64
+
+            # generate the quoted identifier for the aggregation function name column
+            agg_name_col_quoted_identifier = (
+                internal_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[AGG_NAME_COL_LABEL],
+                )[0]
+            )
+
+            def generate_agg_qc(
+                col_single_agg_func_map: dict[
+                    PandasLabelToSnowflakeIdentifierPair,
+                    AggFuncInfo,
+                ],
+                index_value: str,
+            ) -> SnowflakeQueryCompiler:
+                """
+                Helper function that generates a one-row QC of aggregations determined by
+                `col_single_agg_func_map`.
+
+                Parameters
+                ----------
+                col_single_agg_func_map: Dict[PandasLabelToSnowflakeIdentifierPair, AggFuncInfo]
+                    A map of pandas label/column identifier pairs -> AggFuncInfo. This represents
+                    the aggregation function to apply to every column (see notes for more details).
+                index_value: str
+                    The value of the index column. This should always be either MODIN_UNNAMED_SERIES_LABEL or
+                    the name of the aggregation function.
+
+                Returns
+                -------
+                SnowflakeQueryCompiler
+                    A 1-row query compiler representing the result of applying the specified aggregations.
+
+                Notes
+                -----
+                `col_single_agg_func_map` may sometimes contain only a single aggregation function, as when
+                `df.agg({"A": ["min"], "B": ["min", "max"]})` is called. In this case, the resulting
+                frame should have one row for `min` and one row for `max`, so this function will be
+                called twice. It is called first with `col_single_agg_func_map = {"A": min, "B": min}`
+                and `index_value = "min"`. This will return a QC representing this frame:
+                -----------------
+                | INDEX | a | b |
+                -----------------
+                |   min | 1 | 2 |
+                -----------------
+                This helper is then called a second time with
+                `col_single_agg_func_map = {"A": <dummy>, "B": max}` and `index_value = "max"`
+                to produce a QC representing this frame:
+                -----------------
+                | INDEX | a | b |
+                -----------------
+                |   max |nan| 8 |
+                -----------------
+                These two rows are then concatenated together.
+
+                `col_single_agg_func_map` may also contain multiple distinct aggregations when
+                `should_squeeze` is True. In this case, the result should contain only a single row,
+                so if different aggregations are specified like in `df.agg({"A": min, "B": max})`,
+                this function is only called once with `col_single_agg_func_map = {"A": min, "B": max"}
+                and `index_value = MODIN_UNNAMED_SERIES_LABEL`. This returns the following:
+                -----------------------
+                |    INDEX    | a | b |
+                -----------------------
+                | __reduced__ | 1 | 8 |
+                -----------------------
+                """
+                (col_agg_infos, _) = generate_column_agg_info(
+                    internal_frame,
+                    col_single_agg_func_map,
+                    kwargs,
+                    include_agg_func_only_in_result_label=False,
+                )
+                single_agg_ordered_dataframe = aggregate_with_ordered_dataframe(
+                    ordered_dataframe=internal_frame.ordered_dataframe,
+                    agg_col_ops=col_agg_infos,
+                    agg_kwargs=kwargs,
+                    index_column_snowflake_quoted_identifier=internal_frame.index_column_snowflake_quoted_identifiers,
+                )
+                # append an extra column with the name of the aggregation function
+                single_agg_ordered_dataframe = append_columns(
+                    single_agg_ordered_dataframe,
+                    agg_name_col_quoted_identifier,
+                    pandas_lit(index_value),
+                )
+                single_agg_ordered_dataframe = single_agg_ordered_dataframe.sort(
+                    OrderingColumn(agg_name_col_quoted_identifier)
+                )
+                single_agg_dataframe = InternalFrame.create(
+                    ordered_dataframe=single_agg_ordered_dataframe,
+                    data_column_pandas_labels=[
+                        col.agg_pandas_label for col in col_agg_infos
+                    ],
+                    data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+                    data_column_snowflake_quoted_identifiers=[
+                        col.agg_snowflake_quoted_identifier for col in col_agg_infos
+                    ],
+                    index_column_pandas_labels=[None],
+                    index_column_snowflake_quoted_identifiers=[
+                        agg_name_col_quoted_identifier
+                    ],
+                )
+                return SnowflakeQueryCompiler(single_agg_dataframe)
+
+            if should_squeeze:
+                # Return a single 1-row frame.
+                # This branch is taken if `func` is a dict where all values are scalar function/str.
+                # We cannot use `agg_func_to_col_map` here because when `should_squeeze` is true
+                # we need all aggregations to be in a single row, whereas in all other cases
+                # we have one QC for each aggregation that we can UNION ALL together later.
+                # We cannot UNION ALL in the `should_squeeze` case because the result must always
+                # have exactly one row.
+                # For example, suppose we call `df.agg({"a": min, "b": max}, axis=0)`. Here,
+                # `should_squeeze` is true, and we should produce the following 1-row frame:
+                # ---------------------------
+                # |       INDEX |   a |   b |
+                # ---------------------------
+                # | __reduced__ |   1 |   8 |
+                # ---------------------------
+                #
+                # However, if we were to share logic with the non-`should_squeeze` case, we would
+                # produce the following two frames:
+                # ---------------------           ---------------------
+                # | INDEX |   a |   b |           | INDEX |   a |   b |
+                # --------------------- UNION ALL ---------------------
+                # |   min |   1 | nan |           |   max | nan |   8 |
+                # ---------------------           ---------------------
+                # Since the result of the UNION ALL will have 2 rows (with NaN values filled in)
+                # we cannot use the result in the should_squeeze case.
+                col_single_agg_func_map = {
+                    col: AggFuncInfo(func=agg_func, is_dummy_agg=False)
+                    for col, agg_func in column_to_agg_func.items()
+                }
+
+                single_agg_func_query_compilers.append(
+                    generate_agg_qc(col_single_agg_func_map, MODIN_UNNAMED_SERIES_LABEL)
+                )
+
+            else:
+                for agg_func, cols in agg_func_to_col_map.items():
+                    col_single_agg_func_map = {
+                        column: AggFuncInfo(
+                            func=agg_func if column in cols else "min",
+                            is_dummy_agg=column not in cols,
+                        )
+                        for column in column_to_agg_func.keys()
+                    }
+                    single_agg_func_query_compilers.append(
+                        generate_agg_qc(
+                            col_single_agg_func_map, get_pandas_aggr_func_name(agg_func)
+                        )
+                    )
+
+        assert single_agg_func_query_compilers, "no aggregation result"
+        if len(single_agg_func_query_compilers) == 1:
+            result = single_agg_func_query_compilers[0]
+        else:
+            result = single_agg_func_query_compilers[0].concat(
+                axis=0, other=single_agg_func_query_compilers[1:]
+            )
+        if axis == 0 and (should_squeeze or is_scalar(func)):
+            # In this branch, the concatenated frame is a 1-row frame, but needs to be converted
+            # into a 1-column frame so the frontend can wrap it as a Series
+            result = result.transpose_single_row()
+        return result
+
+    def insert(
+        self,
+        loc: int,
+        pandas_label: Hashable,
+        value: Union[Scalar, "SnowflakeQueryCompiler"],
+        join_on_index: Optional[bool] = False,
+        replace: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Insert new column at specified location.
+
+        Args:
+            loc: Insertion index, must be 0 <= loc <= len(columns)
+            pandas_label: Label for the inserted column.
+            value: Value of the column. Can be Scalar or SnowflakeQueryCompiler with one column.
+            join_on_index: If True, join 'value' query compiler with index of
+              this query compiler. If False, join on row position.
+            replace: If True, new column is not appended but new column replaces existing column at loc
+        Returns:
+            A new SnowflakeQueryCompiler instance with new column.
+        """
+
+        if not isinstance(value, SnowflakeQueryCompiler):
+            # Scalar value
+            new_internal_frame = self._modin_frame.append_column(
+                pandas_label, pandas_lit(value)
+            )
+        elif join_on_index:
+            assert len(value.columns) == 1
+
+            # rename given Series (as SnowflakeQueryCompiler) to the desired label
+            value = value.set_columns([pandas_label])
+
+            if (
+                self._modin_frame.num_index_columns
+                == value._modin_frame.num_index_columns
+            ):
+                # In Native pandas Number of rows should remain unchanged, and therefore one-to-many
+                # join is disallowed, and a ValueError with message "cannot reindex on an axis with duplicate labels"
+                # is raised when the value index contains duplication. For example: with the following frame
+                #       A       B
+                # 1     1       2
+                # 2     3       2
+                # 3     4       3
+                # and the value frame
+                # 1  0
+                # 2  0
+                # 2  3
+                # frame.insert(2, "C", value) raises ValueError.
+
+                # However, In Snowpark pandas, to avoid eager evaluation, we do not perform the uniqueness check.
+                # Therefore, the above example will not raise error anymore, instead, it produces a result with left
+                # align behavior, and produces result like following:
+                #       A       B       C
+                # 1     1       2       0
+                # 2     3       2       0
+                # 2     3       2       3
+                # 3     4       3       NaN
+
+                # set the index name of the value frame to be the same as the frame to allow join on all index columns
+                new_value = value.set_index_names(
+                    self._modin_frame.index_column_pandas_labels
+                )
+                # Left align on index columns.
+                new_internal_frame, _ = join_utils.align_on_index(
+                    self._modin_frame,
+                    new_value._modin_frame,
+                    how="coalesce",
+                )
+            else:
+                # We raise error when number of index columns in 'value' are different
+                # from number of index columns in 'self'.
+                # This behavior is differs from native pandas in following cases
+                # 1. self.index.nlevels > value.index.nlevles: Native pandas will insert
+                #    new column with all null values.
+                # 2. self.index.nlevels < value.index.nlevles and self is empty: Native
+                #    pandas will use 'value' as final result.
+                raise ValueError(
+                    "Number of index levels of inserted column are different from frame index"
+                )
+        else:
+            # rename given Series (as SnowflakeQueryCompiler) to the desired label
+            value = value.set_columns([pandas_label])
+            self_frame = self._modin_frame.ensure_row_position_column()
+            value_frame = value._modin_frame.ensure_row_position_column()
+
+            new_internal_frame = join_utils.align(
+                left=self_frame,
+                right=value_frame,
+                left_on=[self_frame.row_position_snowflake_quoted_identifier],
+                right_on=[value_frame.row_position_snowflake_quoted_identifier],
+                how="coalesce",
+            ).result_frame
+
+        # New column is added at the end. Move this to desired location as specified by
+        # 'loc'
+        def move_last_element(arr: list, index: int) -> None:
+            if replace:
+                # swap element at loc with new colun at end, then drop last element
+                arr[index], arr[-1] = arr[-1], arr[index]
+                arr.pop()
+            else:
+                # move last element to desired location
+                last_element = arr.pop()
+                arr.insert(index, last_element)
+
+        data_column_pandas_labels = new_internal_frame.data_column_pandas_labels
+        move_last_element(data_column_pandas_labels, loc)
+        data_column_snowflake_quoted_identifiers = (
+            new_internal_frame.data_column_snowflake_quoted_identifiers
+        )
+        move_last_element(data_column_snowflake_quoted_identifiers, loc)
+
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=new_internal_frame.ordered_dataframe,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=new_internal_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=new_internal_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=new_internal_frame.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def set_index_from_columns(
+        self,
+        keys: list[Hashable],
+        drop: Optional[bool] = True,
+        append: Optional[bool] = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Create or update index (row labels) from a list of columns.
+
+        Args:
+            keys: list of hashable
+              The list of column names that will become the new index.
+            drop: bool, default True
+              Whether to drop the columns provided in the `keys` argument.
+            append: bool, default False
+              Whether to add the columns in `keys` as new levels appended to the
+              existing index.
+
+        Returns:
+            A new QueryCompiler instance with updated index.
+        """
+        index_column_pandas_labels = keys
+        index_column_snowflake_quoted_identifiers = []
+        for (
+            ids
+        ) in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            keys
+        ):
+            # Error checking for missing labels is already done in frontend layer.
+            index_column_snowflake_quoted_identifiers.append(ids[0])
+
+        if drop:
+            # Exclude 'keys' from data columns.
+            data_column_pandas_labels = []
+            data_column_snowflake_quoted_identifiers = []
+            for i, label in enumerate(self._modin_frame.data_column_pandas_labels):
+                if label not in keys:
+                    data_column_pandas_labels.append(label)
+                    data_column_snowflake_quoted_identifiers.append(
+                        self._modin_frame.data_column_snowflake_quoted_identifiers[i]
+                    )
+        else:
+            data_column_pandas_labels = self._modin_frame.data_column_pandas_labels
+            data_column_snowflake_quoted_identifiers = (
+                self._modin_frame.data_column_snowflake_quoted_identifiers
+            )
+
+        # Generate aliases for new index columns if
+        # 1. 'keys' are also kept as data columns, or
+        # 2. 'keys' have duplicates.
+        #   For example:
+        #     >>> pd.DataFrame({"A": [1], "B": [2]})
+        #     >>> pd.set_index(["A", "A"]
+        #           B
+        #       A A
+        #       1 1 2
+        # Note: When drop is True and there are no duplicates in 'keys', this is purely
+        # a client side metadata operation.
+        ordered_dataframe = self._modin_frame.ordered_dataframe
+        if not drop or len(set(keys)) != len(keys):
+            new_index_identifiers = self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=keys
+            )
+            values = [col(sf_id) for sf_id in index_column_snowflake_quoted_identifiers]
+            index_column_snowflake_quoted_identifiers = new_index_identifiers
+            # Create duplicate identifiers in underlying snowpark dataframe.
+            # Generates SQL like 'SELECT old_id as new_id_1, old_id as new_id_2 ...'
+            ordered_dataframe = append_columns(
+                ordered_dataframe, new_index_identifiers, values
+            )
+
+        if append:
+            # Append to existing index columns instead of replacing it.
+            index_column_pandas_labels = (
+                self._modin_frame.index_column_pandas_labels
+                + index_column_pandas_labels
+            )
+            index_column_snowflake_quoted_identifiers = (
+                self._modin_frame.index_column_snowflake_quoted_identifiers
+                + index_column_snowflake_quoted_identifiers
+            )
+
+        frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            index_column_pandas_labels=index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(frame)
+
+    def rename(
+        self,
+        *,
+        index_renamer: Optional[Renamer] = None,
+        columns_renamer: Optional[Renamer] = None,
+        # TODO: SNOW-800889 handle level is hashable
+        level: Optional[Union[Hashable, int]] = None,
+        errors: Optional[IgnoreRaise] = "ignore",
+    ) -> "SnowflakeQueryCompiler":
+        internal_frame = self._modin_frame
+        if index_renamer is not None:
+            # rename index means to update the values in the index columns
+            # TODO: SNOW-850784 convert all mapper renamer into a Snowpark pandas Series and use insert and coalesce to
+            # generate the new index columns in parallel
+            if callable(index_renamer):
+                # TODO: use df.apply() to handle callable
+                # currently use fallback and have to pull all index values
+                internal_frame = DataFrameDefault.register(native_pd.DataFrame.rename)(
+                    self, index=index_renamer, level=level, errors=errors
+                )._modin_frame
+            else:
+                # TODO: SNOW-841607 support multiindex in join_utils.join. Now all multiindex cases are fallback to SP
+                if (
+                    self._modin_frame.is_multiindex(axis=0)
+                    or self._modin_frame.is_multiindex(axis=1)
+                    or index_renamer._query_compiler._modin_frame.is_multiindex(axis=0)
+                ):
+                    internal_frame = DataFrameDefault.register(
+                        native_pd.DataFrame.rename
+                    )(
+                        self, index=index_renamer, level=level, errors=errors
+                    )._modin_frame
+                else:
+                    index_col_id = (
+                        internal_frame.index_column_snowflake_quoted_identifiers[0]
+                    )
+                    index_renamer_internal_frame = (
+                        index_renamer._query_compiler._modin_frame
+                    )
+
+                    if errors == "raise":
+                        # raise a KeyError when a dict-like mapper, index, or columns contains labels that are not
+                        # present in the Index being transformed. Here we use inner join and count on the result to
+                        # check whether renamer is valid.
+                        label_join_result = join_utils.join(
+                            internal_frame,
+                            index_renamer_internal_frame,
+                            left_on=[index_col_id],
+                            right_on=index_renamer_internal_frame.index_column_snowflake_quoted_identifiers,
+                            how="inner",
+                        ).result_frame
+                        if not label_join_result.num_rows:
+                            raise KeyError(
+                                f"{index_renamer.index.values.tolist()} not found in axis"
+                            )
+
+                    # Left join index_renamer_internal_frame.
+                    internal_frame, result_column_mapper = join_utils.join(
+                        internal_frame,
+                        index_renamer_internal_frame,
+                        left_on=[index_col_id],
+                        right_on=index_renamer_internal_frame.index_column_snowflake_quoted_identifiers,
+                        how="left",
+                    )
+                    # use coalesce to replace index values with the renamed ones
+                    new_index_col_id = result_column_mapper.map_right_quoted_identifiers(
+                        index_renamer_internal_frame.data_column_snowflake_quoted_identifiers
+                    )[
+                        0
+                    ]
+                    # if index datatype may change after rename, we have to cast the new index column to variant
+                    quoted_identifier_to_type_map = (
+                        index_renamer_internal_frame.quoted_identifier_to_snowflake_type()
+                    )
+                    index_datatype_may_change = [
+                        quoted_identifier_to_type_map[quoted_identifier]
+                        for quoted_identifier in index_renamer_internal_frame.index_column_snowflake_quoted_identifiers
+                    ] != [
+                        quoted_identifier_to_type_map[quoted_identifier]
+                        for quoted_identifier in index_renamer_internal_frame.data_column_snowflake_quoted_identifiers
+                    ]
+                    index_col, new_index_col = col(index_col_id), col(new_index_col_id)
+                    if index_datatype_may_change:
+                        index_col, new_index_col = cast(index_col, VariantType()), cast(
+                            new_index_col, VariantType()
+                        )
+                    new_index_col = coalesce(new_index_col, index_col)
+                    internal_frame = internal_frame.update_snowflake_quoted_identifiers_with_expressions(
+                        {index_col_id: new_index_col}
+                    ).frame
+                    internal_frame = InternalFrame.create(
+                        ordered_dataframe=internal_frame.ordered_dataframe,
+                        data_column_pandas_labels=internal_frame.data_column_pandas_labels[
+                            :-1
+                        ],  # remove the last column, i.e., the index renamer column
+                        data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers[
+                            :-1
+                        ],
+                        # remove the last column, i.e., the index renamer column
+                        data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+                        index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+                        index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+                    )
+
+        new_qc = SnowflakeQueryCompiler(internal_frame)
+        if columns_renamer is not None:
+            # renaming columns needs to change the column names (not values in the columns)
+            new_data_column_pandas_labels = (
+                native_pd.DataFrame(columns=self.columns)
+                .rename(columns=columns_renamer, level=level, errors=errors)
+                .columns
+            )
+            new_qc = new_qc.set_columns(new_data_column_pandas_labels)
+
+        return new_qc
+
+    def dataframe_to_datetime(
+        self,
+        errors: DateTimeErrorChoices = "raise",
+        dayfirst: bool = False,
+        yearfirst: bool = False,
+        utc: bool = False,
+        format: Optional[str] = None,
+        exact: Union[bool, lib.NoDefault] = lib.no_default,
+        unit: Optional[str] = None,
+        infer_datetime_format: Union[lib.NoDefault, bool] = lib.no_default,
+        origin: DateTimeOrigin = "unix",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Convert dataframe to the datetime dtype.
+
+        Args:
+            errors: to_datetime errors
+            dayfirst: to_datetime dayfirst
+            yearfirst: to_datetime yearfirst
+            utc: to_datetime utc
+            format: to_datetime format
+            exact: to_datetime exact
+            unit: to_datetime unit
+            infer_datetime_format: to_datetime infer_datetime_format
+            origin: to_datetime origin
+        Returns:
+            SnowflakeQueryCompiler:
+            QueryCompiler with a single data column converted to datetime dtype.
+        """
+        if to_datetime_require_fallback(
+            format, exact, infer_datetime_format, origin, errors
+        ):
+            return DataFrameDefault.register(native_pd.to_datetime)(
+                self,
+                errors=errors,
+                dayfirst=dayfirst,
+                yearfirst=yearfirst,
+                utc=utc,
+                format=format,
+                exact=exact,
+                unit=unit,
+                infer_datetime_format=infer_datetime_format,
+                origin=origin,
+            )
+        if origin != "unix":
+            """
+            Non-default values of the `origin` argument are only valid for scalars and 1D arrays.
+
+            pandas will raise a different error message depending on whether a dict or
+            a dataframe-wrapped dict was passed in as argument. This distinction is not
+            particularly important for us.
+
+            >>> native_pd.to_datetime({"year": [2000], "month": [3], "day": [1]}, origin=1e9)
+            ValueError: '{'year': [2000], 'month': [3], 'day': [1]}' is not compatible with origin='1000000000.0'; it must be numeric with a unit specified
+            >>> native_pd.to_datetime(pd.DataFrame({"year": [2000], "month": [3], "day": [1]}), origin=1e9)
+            TypeError: arg must be a string, datetime, list, tuple, 1-d array, or Series
+            """
+            raise TypeError(
+                "arg must be a string, datetime, list, tuple, 1-d array, or Series"
+            )
+        # first check all dataframe column names are valid and make sure required names, i.e, year, month, and, day,
+        # are always included. pandas use case insenstive check for those names so we follow the same way.
+        # pandas also allows including plural, abbreviated, and unabbreviated forms
+        # if the same field is specified multiple times (e.g. "year" and "years" in the same dataframe),
+        # pandas simply accepts the last one in iteration order
+        str_label_to_id_map = {}
+        for label, id in zip(
+            self._modin_frame.data_column_pandas_labels,
+            self._modin_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            if (
+                not isinstance(label, str)
+                or label.lower() not in VALID_TO_DATETIME_DF_KEYS
+            ):
+                raise ValueError(
+                    f"extra keys have been passed to the datetime assemblage: [{str(label)}]"
+                )
+            str_label_to_id_map[VALID_TO_DATETIME_DF_KEYS[label.lower()]] = id
+        missing_required_labels = []
+        for label in ["day", "month", "year"]:
+            if label not in str_label_to_id_map:
+                missing_required_labels.append(label)
+        if missing_required_labels:
+            raise ValueError(
+                f"to assemble mappings requires at least that [year, month, day] be specified: [{','.join(missing_required_labels)}] is missing"
+            )
+
+        id_to_sf_type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        # fallback if the original data type is not integer. Note pandas will always cast other types to integer and
+        # the way it does is not quite straightforward to implement. For example, a month value 3.1 will be cast to
+        # March with 10 days and the 10 days will be added with what values in the day column.
+        for sf_type in id_to_sf_type_map.values():
+            if not isinstance(sf_type, _IntegralType):
+                return DataFrameDefault.register(native_pd.to_datetime)(
+                    self,
+                    errors=errors,
+                    dayfirst=dayfirst,
+                    yearfirst=yearfirst,
+                    utc=utc,
+                    format=format,
+                    exact=exact,
+                    unit=unit,
+                    infer_datetime_format=infer_datetime_format,
+                    origin=origin,
+                )
+        # if the column is already integer, we can use Snowflake timestamp_ntz_from_parts function to handle it
+        # since timestamp_ntz_from_parts only allows nanosecond as the fraction input, we generate it from the
+        # input columns
+        nanosecond = pandas_lit(0)
+        if "ms" in str_label_to_id_map:
+            nanosecond += col(str_label_to_id_map["ms"]) * 10**6
+        if "us" in str_label_to_id_map:
+            nanosecond += col(str_label_to_id_map["us"]) * 10**3
+        if "ns" in str_label_to_id_map:
+            nanosecond += col(str_label_to_id_map["ns"])
+        new_column_name = (
+            self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["timestamp_ntz_from_parts"],
+            )[0]
+        )
+        new_column = timestamp_ntz_from_parts(
+            str_label_to_id_map["year"],
+            str_label_to_id_map["month"],
+            str_label_to_id_map["day"],
+            str_label_to_id_map["hour"] if "hour" in str_label_to_id_map else 0,
+            str_label_to_id_map["minute"] if "minute" in str_label_to_id_map else 0,
+            str_label_to_id_map["second"] if "second" in str_label_to_id_map else 0,
+            nanosecond,
+        ).as_(new_column_name)
+        # new selected columns will add the timestamp_ntz_from_parts column as the only data column. Here, we make
+        # sure exclude existing data columns
+        new_selected_columns = set(
+            [new_column]
+            + self._modin_frame.ordering_column_snowflake_quoted_identifiers
+            + [self._modin_frame.row_position_snowflake_quoted_identifier]
+            + self._modin_frame.index_column_snowflake_quoted_identifiers
+        )
+
+        new_dataframe = self._modin_frame.ordered_dataframe.select(new_selected_columns)
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=new_dataframe,
+                data_column_pandas_labels=[MODIN_UNNAMED_SERIES_LABEL],
+                data_column_snowflake_quoted_identifiers=[new_column_name],
+                data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+                index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    def series_to_datetime(
+        self,
+        errors: DateTimeErrorChoices = "raise",
+        dayfirst: bool = False,
+        yearfirst: bool = False,
+        utc: bool = False,
+        format: Optional[str] = None,
+        exact: Union[bool, lib.NoDefault] = lib.no_default,
+        unit: Optional[str] = None,
+        infer_datetime_format: Union[lib.NoDefault, bool] = lib.no_default,
+        origin: DateTimeOrigin = "unix",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Convert series to the datetime dtype.
+
+        Args:
+            errors: to_datetime errors
+            dayfirst: to_datetime dayfirst
+            yearfirst: to_datetime yearfirst
+            utc: to_datetime utc
+            format: to_datetime format
+            exact: to_datetime exact
+            unit: to_datetime unit
+            infer_datetime_format: to_datetime infer_datetime_format
+            origin: to_datetime origin
+        Returns:
+            SnowflakeQueryCompiler:
+            QueryCompiler with a single data column converted to datetime dtype.
+        """
+        if to_datetime_require_fallback(
+            format, exact, infer_datetime_format, origin, errors
+        ):
+            return SeriesDefault.register(native_pd.to_datetime)(
+                self,
+                errors=errors,
+                dayfirst=dayfirst,
+                yearfirst=yearfirst,
+                utc=utc,
+                format=format,
+                exact=exact,
+                unit=unit,
+                infer_datetime_format=infer_datetime_format,
+                origin=origin,
+            )
+        # convert format to sf_format which will be valid to use by to_timestamp functions in Snowflake
+        sf_format = (
+            to_snowflake_timestamp_format(format) if format is not None else None
+        )
+        id_to_sf_type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        col_id = self._modin_frame.data_column_snowflake_quoted_identifiers[0]
+        sf_type = id_to_sf_type_map[col_id]
+
+        if isinstance(sf_type, BooleanType):
+            # bool is not allowed in to_datetime (but note that bool is allowed by astype)
+            raise TypeError("dtype bool cannot be converted to datetime64[ns]")
+
+        to_datetime_cols = {
+            col_id: generate_timestamp_col(
+                col(col_id),
+                sf_type,
+                sf_format=sf_format,
+                errors=errors,
+                target_tz="UTC" if utc else None,
+                unit="ns" if unit is None else unit,
+                origin=origin,
+            )
+        }
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                to_datetime_cols
+            ).frame
+        )
+
+    def concat(
+        self,
+        axis: Axis,
+        other: list["SnowflakeQueryCompiler"],
+        *,
+        join: Optional[Literal["outer", "inner"]] = "outer",
+        ignore_index: bool = False,
+        keys: Optional[Sequence[Hashable]] = None,
+        levels: Optional[list[Sequence[Hashable]]] = None,
+        names: Optional[list[Hashable]] = None,
+        verify_integrity: Optional[bool] = False,
+        sort: Optional[bool] = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Concatenate `self` with passed query compilers along specified axis.
+        Args:
+            axis : {0, 1}
+              Axis to concatenate along. 0 is for index and 1 is for columns.
+            other : SnowflakeQueryCompiler or list of such
+              Objects to concatenate with `self`.
+            join : {'inner', 'outer'}, default 'outer'
+              How to handle indexes on other axis (or axes).
+            ignore_index : bool, default False
+              If True, do not use the index values along the concatenation axis. The
+              resulting axis will be labeled 0, ..., n - 1. This is useful if you are
+              concatenating objects where the concatenation axis does not have
+              meaningful indexing information. Note the index values on the other
+              axes are still respected in the join.
+            keys : sequence, default None
+              If multiple levels passed, should contain tuples. Construct
+              hierarchical index using the passed keys as the outermost level.
+            levels : list of sequences, default None
+              Specific levels (unique values) to use for constructing a
+              MultiIndex. Otherwise they will be inferred from the keys.
+            names : list, default None
+              Names for the levels in the resulting hierarchical index.
+            verify_integrity : bool, default False
+              Check whether the new concatenated axis contains duplicates. This can
+              be very expensive relative to the actual data concatenation.
+            sort : bool, default False
+              Sort non-concatenation axis if it is not already aligned when `join`
+              is 'outer'.
+              This has no effect when ``join='inner'``, which already preserves
+              the order of the non-concatenation axis.
+
+        Returns:
+            SnowflakeQueryCompiler for concatenated objects.
+
+        Notes:
+            If frames have incompatible column/row indices we flatten the
+            indices (same as what native pandas does in some cases) to make
+            them compatible.
+            For example if following two frames being concatenated has following column
+            indices:
+            column index for frame 1:
+            pd.MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'y'])
+            column index for frame 2:
+            pd.Index(['e', 'f'])
+            Column index of contentated index will be:
+            pd.Index([('a', 'b'), ('c', 'd'), 'e', 'f'])
+            NOTE: Original column level names are lost and result column index has only
+            one level.
+        """
+        if levels is not None:
+            raise NotImplementedError(
+                "Snowpark pandas doesn't support 'levels' argument in concat API"
+            )
+        frames = [self._modin_frame] + [o._modin_frame for o in other]
+
+        # If index columns differ in size, convert all multi-index row labels to
+        # tuples with single level index.
+        if len({f.num_index_columns for f in frames}) > 1:
+            # If ignore_index is True on axis = 0 we fix index compatibility by doing
+            # reset and drop all indices.
+            if axis == 0 and ignore_index:
+                frames = [
+                    SnowflakeQueryCompiler(f).reset_index(drop=True)._modin_frame
+                    for f in frames
+                ]
+            else:
+                frames = [
+                    concat_utils.convert_to_single_level_index(f, axis=0)
+                    for f in frames
+                ]
+
+        # When concatenating frames where column indices are not compatible, native
+        # pandas behavior is not consistent and hard to explain.
+        # In native pandas concatenating frame with incompatible column indices will
+        # succeed sometimes by flattening the multiindex to make them compatible.
+        # (Refer to pandas.Index.to_flat_index to understand index flattening)
+        # For Example:
+        # >>> df1 = pd.DataFrame([1], columns=["a"])
+        # >>> df2 = pd.DataFrame([2], columns=pd.MultiIndex.from_tuples([('a', 'b')]))
+        # >>> pd.concat([df1, df2])
+        #      a  (a, b)
+        # 0	  1.0	NaN
+        # 0	  NaN	2.0
+        #
+        # But sometimes it fails with one of following very unhelpful errors.
+        # ValueError: Length of names must match number of levels in MultiIndex.
+        # ValueError: no types given
+        # IndexError: tuple index out of range
+        # ValueError: non-broadcastable output operand with shape ... doesn't match the broadcast shape ...
+        # ValueError: operands could not be broadcast together with shapes ...
+        #
+        # In Snowpark pandas, we provide consistent behavior by always succeeding
+        # the concat. If frames have incompatible column indices we flatten the
+        # column indices (same as what native pandas does in some cases) to make
+        # them compatible.
+        if not all(
+            join_utils.is_column_index_compatible(frames[0], f) for f in frames[1:]
+        ):
+            frames = [
+                concat_utils.convert_to_single_level_index(f, axis=1) for f in frames
+            ]
+
+        # Preserve these index column names whenever possible. If all input
+        # objects share a common name, this name will be assigned to the
+        # result. When the input names do not all agree, the result will be
+        # unnamed. The same is true for MultiIndex, but the logic is applied
+        # separately on a level-by-level basis.
+        index_column_labels = frames[0].index_column_pandas_labels
+        for other_frame in frames[1:]:
+            index_column_labels = [
+                name1 if name1 == name2 else None
+                for name1, name2 in zip(
+                    index_column_labels, other_frame.index_column_pandas_labels
+                )
+            ]
+
+        frames = [
+            SnowflakeQueryCompiler(f).set_index_names(index_column_labels)._modin_frame
+            for f in frames
+        ]
+        if axis == 1:
+            result_frame = frames[0]
+            for other_frame in frames[1:]:
+                # Concat on axis = 1 is implemented using join operation. This is
+                # equivalent to joining on index columns when index labels are same for
+                # both the frames.
+                # We rename index labels to make sure index columns are joined level
+                # by level.
+                result_frame, _ = join_utils.join_on_index_columns(
+                    result_frame, other_frame, how=join, sort=sort
+                )
+
+            qc = SnowflakeQueryCompiler(result_frame)
+
+            if ignore_index:
+                qc = qc.set_columns(native_pd.RangeIndex(len(qc.columns)))
+            elif keys is not None:
+                columns = concat_utils.add_keys_as_column_levels(
+                    qc.columns, frames, keys, names
+                )
+                qc = qc.set_columns(columns)
+        else:  # axis = 0
+            # Add key as outermost index levels.
+            if keys and not ignore_index:
+                frames = [
+                    concat_utils.add_key_as_index_columns(frame, key)
+                    for key, frame in zip(keys, frames)
+                ]
+
+            # Ensure rows position column and add a new ordering column for global
+            # ordering.
+            for i, frame in enumerate(frames):
+                frames[i] = concat_utils.add_global_ordering_columns(frame, i + 1)
+
+            result_frame = frames[0]
+            for other_frame in frames[1:]:
+                result_frame = concat_utils.union_all(
+                    result_frame, other_frame, join, sort
+                )
+
+            qc = SnowflakeQueryCompiler(result_frame)
+            if ignore_index:
+                qc = qc.reset_index(drop=True)
+            elif keys and names:
+                # Fill with 'None' to match the number of index columns.
+                while len(names) < frames[0].num_index_columns:
+                    names.append(None)
+                qc = qc.set_index_names(names)
+
+        # If ignore_index is True, it will assign new index values which will not have
+        # any duplicates. So there is no need to verify index integrity when
+        # ignore_index is True.
+        if verify_integrity and not ignore_index:
+            if not qc._modin_frame.has_unique_index(axis=axis):
+                # Same error as native pandas.
+                if axis == 1:
+                    overlap = qc.columns[qc.columns.duplicated()].unique()
+                    # native pandas raises ValueError: Indexes have overlapping values...
+                    # We use different error message for clarity.
+                    raise ValueError(f"Columns have overlapping values: {overlap}")
+                else:
+                    snowflake_ids = (
+                        qc._modin_frame.index_column_snowflake_quoted_identifiers
+                    )
+                    # There can be large number of duplicates, only fetch 10
+                    # values to client.
+                    limit = 10
+                    rows = (
+                        qc._modin_frame.ordered_dataframe.group_by(
+                            snowflake_ids, count(col("*")).alias("cnt")
+                        )
+                        .filter(col("cnt") > 1)
+                        .limit(limit)
+                        .select(snowflake_ids)
+                        .collect()
+                    )
+                    overlap = []
+                    for row in rows:
+                        values = row.as_dict().values()
+                        overlap.append(
+                            tuple(values) if len(values) > 1 else list(values)[0]
+                        )
+                    overlap = native_pd.Index(overlap)
+                    if len(overlap) < limit:
+                        # Same error as native pandas
+                        raise ValueError(f"Indexes have overlapping values: {overlap}")
+                    else:
+                        # In case of large overlaps, Snowpark pandas display different
+                        # error message.
+                        raise ValueError(
+                            f"Indexes have overlapping values. Few of them are: {overlap}. Please run df1.index.intersection(df2.index) to see complete list"
+                        )
+        return qc
+
+    def cumsum(
+        self, axis: int = 0, skipna: bool = True, *args: Any, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return cumulative sum over a DataFrame or Series axis.
+
+        Args:
+            axis : {0 or 1}, default 0
+                Axis to compute the cumulative sum along.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            *args, **kwargs :
+                Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns:
+            SnowflakeQueryCompiler instance with cumulative sum of Series or DataFrame.
+        """
+        if axis == 1:
+            ErrorMessage.not_implemented("cumsum with axis=1 is not supported yet")
+
+        cumagg_col_to_expr_map = get_cumagg_col_to_expr_map_axis0(self, sum_, skipna)
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                cumagg_col_to_expr_map
+            ).frame
+        )
+
+    def cummin(
+        self, axis: int = 0, skipna: bool = True, *args: Any, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return cumulative min over a DataFrame or Series axis.
+
+        Args:
+            axis : {0 or 1}, default 0
+                Axis to compute the cumulative min along.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            *args, **kwargs :
+                Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns:
+            SnowflakeQueryCompiler instance with cumulative min of Series or DataFrame.
+        """
+        if axis == 1:
+            ErrorMessage.not_implemented("cummin with axis=1 is not supported yet")
+
+        cumagg_col_to_expr_map = get_cumagg_col_to_expr_map_axis0(self, min_, skipna)
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                cumagg_col_to_expr_map
+            ).frame
+        )
+
+    def cummax(
+        self, axis: int = 0, skipna: bool = True, *args: Any, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return cumulative max over a DataFrame or Series axis.
+
+        Args:
+            axis : {0 or 1}, default 0
+                Axis to compute the cumulative max along.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            *args, **kwargs :
+                Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns:
+            SnowflakeQueryCompiler instance with cumulative max of Series or DataFrame.
+        """
+        if axis == 1:
+            ErrorMessage.not_implemented("cummax with axis=1 is not supported yet")
+
+        cumagg_col_to_expr_map = get_cumagg_col_to_expr_map_axis0(self, max_, skipna)
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                cumagg_col_to_expr_map
+            ).frame
+        )
+
+    def melt(
+        self,
+        id_vars: list[str],
+        value_vars: list[str],
+        var_name: Optional[str],
+        value_name: Optional[str],
+        col_level: Optional[int] = None,
+        ignore_index: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Unpivot dataframe from wide to long format. The order
+        of the data is sorted by column order. Mixed types are
+        promoted to Variant.
+
+        Args:
+            id_vars : list of identifiers to retain in the result
+            value_vars : list of columns to unpivot on
+            var_name : variable name, defaults to "variable"
+            value_name : value name, defaults to "value"
+            col_level : int, not implemented
+            ignore_index : bool, ignore the index
+
+        Returns:
+            SnowflakeQueryCompiler
+                New QueryCompiler with unpivoted data.
+
+        Notes:
+            melt does not yet handle multiindex or ignore index
+        """
+        if col_level is not None:
+            raise NotImplementedError(
+                "Snowpark Pandas doesn't support 'col_level' argument in melt API"
+            )
+        if self._modin_frame.is_multiindex(axis=1):
+            raise NotImplementedError(
+                "Snowpark Pandas doesn't support multiindex columns in melt API"
+            )
+
+        frame = self._modin_frame
+        value_var_list = frame.data_column_pandas_labels
+        for c in id_vars:
+            value_var_list.remove(c)
+        for c in value_vars:
+            value_var_list.append(c)
+
+        if len(frame.data_column_snowflake_quoted_identifiers) <= 0:
+            return unpivot_empty_df()
+
+        new_internal_frame = unpivot(
+            frame,
+            id_vars,
+            value_vars,
+            var_name,
+            value_name,
+            ignore_index,
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def merge(
+        self,
+        right: "SnowflakeQueryCompiler",
+        how: JoinTypeLit,
+        on: Optional[IndexLabel] = None,
+        left_on: Optional[
+            Union[
+                Hashable,
+                "SnowflakeQueryCompiler",
+                list[Union[Hashable, "SnowflakeQueryCompiler"]],
+            ]
+        ] = None,
+        right_on: Optional[
+            Union[
+                Hashable,
+                "SnowflakeQueryCompiler",
+                list[Union[Hashable, "SnowflakeQueryCompiler"]],
+            ]
+        ] = None,
+        left_index: Optional[bool] = False,
+        right_index: Optional[bool] = False,
+        sort: Optional[bool] = False,
+        suffixes: Suffixes = ("_x", "_y"),
+        copy: Optional[bool] = True,
+        indicator: Optional[Union[bool, str]] = False,
+        validate: Optional[str] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Merge with SnowflakeQueryCompiler object to perform Database-style join.
+
+        Args:
+            right: other SnowflakeQueryCompiler to merge with.
+            how: {'left', 'right', 'outer', 'inner', 'cross'}
+                Type of merge to be performed.
+            on: Labels or list of such to join on.
+            left_on: join keys for left QueryCompiler it can be a label, QueryCompiler
+                or a list of such. QueryCompiler join key represents an external data
+                that should be used for join as if this is a column from left
+                QueryCompiler.
+            right_on: join keys for right QueryCompiler it can be a label, QueryCompiler
+                or a list of such. QueryCompiler join key represents an external data
+                that should be used for join as if this is a column from right
+                QueryCompiler.
+            left_index: If True, use index from left QueryCompiler as join keys. If it
+                is a MultiIndex, the number of keys in the other QueryCompiler (either
+                the index or a number of columns) must match the number of levels.
+            right_index: If True, use index from right QueryCompiler as join keys. Same
+                caveats as 'left_index'.
+            sort: If True, sort the result QueryCompiler on join keys lexicographically.
+                If False, preserve the order from left QueryCompiler and for ties
+                preserve the order from right QueryCompiler.
+            suffixes: A length-2 sequence where each element is optionally a string
+                indicating the suffix to add to overlapping column names in left and
+                right respectively.
+            copy: Not used.
+            indicator: If True, adds a column to the output DataFrame called "_merge"
+                with information on the source of each row. The column can be given a
+                different name by providing a string argument. The column will have a
+                String type with the value of "left_only" for observations whose merge
+                key only appears in the left QueryCompiler, "right_only" for
+                observations whose merge key only appears in the right QueryCompiler,
+                and "both" if the observation’s merge key is found in both
+                QueryCompilers.
+            validate: If specified, checks if merge is of specified type.
+                "one_to_one" or "1:1": check if merge keys are unique in both left and
+                    right datasets.
+                "one_to_many" or "1:m": check if merge keys are unique in left dataset.
+                "many_to_one" or "m:1": check if merge keys are unique in right dataset.
+                "many_to_many" or "m:m": allowed, but does not result in checks.
+
+        Returns:
+            SnowflakeQueryCompiler instance with merged result.
+        """
+        if validate:
+            return DataFrameDefault.register(native_pd.DataFrame.merge)(
+                self,
+                right=right,
+                how=how,
+                left_on=left_on,
+                right_on=right_on,
+                left_index=left_index,
+                right_index=right_index,
+                sort=sort,
+                suffixes=suffixes,
+                copy=copy,
+                indicator=indicator,
+                validate=validate,
+            )
+
+        left = self
+        join_index_on_index = left_index and right_index
+
+        # Labels of indicator columns in input frames.  We use these columns to generate
+        # final indicator column in merged frame.
+        base_indicator_column_labels = []
+        if indicator:
+            suffix = generate_random_alphanumeric()
+            left_label = f"left_indicator_{suffix}"
+            right_label = f"right_indicator_{suffix}"
+            # Value is not important here. While generating final indicator columns in
+            # merged frame we only check if this is null or not. Any non-null value will
+            # work here.
+            left = left.insert(0, left_label, 1)
+            right = right.insert(0, right_label, 1)
+            base_indicator_column_labels = [left_label, right_label]
+
+        if how == "cross" or join_index_on_index:
+            # 1. In cross join we join every row from left frame to every row in right
+            # frame. This doesn't require any join keys.
+
+            # 2. Joining on index-to-index behavior is very different from joining
+            # columns-to-columns or columns-to-index. So we have different code path
+            # 'join_on_index_columns' to handle this. Here we create empty keys to
+            # share the code of renaming conflicting data column labels.
+            left_keys = []
+            right_keys = []
+            common_join_keys = []
+            external_join_keys = []
+        else:
+            left_keys, right_keys = join_utils.get_join_keys(
+                left._modin_frame,
+                right._modin_frame,
+                on,
+                left_on,
+                right_on,
+                left_index,
+                right_index,
+            )
+            # If a join key is an array-like object frontend converts them to Series and
+            # underlying query compiler is passed as join key here.
+            # To join on such keys we
+            # 1. Insert these as column to original frame.
+            # 2. Then join using labels for these inserted columns.
+            (
+                left,
+                left_keys,
+                right,
+                right_keys,
+                external_join_keys,
+            ) = join_utils.insert_external_join_keys_into_join_frames(
+                left, left_keys, right, right_keys
+            )
+            # List of join keys where name of left join label is same as right join label.
+            # These labels are ignored when we rename labels to resolve conflicts.
+            common_join_keys = [
+                lkey for lkey, rkey in zip(left_keys, right_keys) if lkey == rkey
+            ]
+
+        # Rename conflicting data column pandas labels.
+        left_frame, right_frame = join_utils.rename_conflicting_data_column_labels(
+            left, right, common_join_keys, suffixes
+        )
+
+        if join_index_on_index:
+            # Joining on index-to-index behavior is very different from joining
+            # columns-to-columns or columns-to-index. So we have different code path to
+            # handle this.
+            merged_frame, _ = join_utils.join_on_index_columns(
+                left_frame, right_frame, how=how, sort=sort
+            )
+            return SnowflakeQueryCompiler(merged_frame)
+
+        # When joining underlying Snowpark dataframes we pass join condition as
+        # col(left.a) == col(right.a). This will keep both the columns from left and
+        # right frame. But pandas expects only one column to be present in joined frame
+        # if join key pair has same name in both the frames. We remove the unnecessary
+        # columns to match pandas behavior. When coalesce_config is LEFT corresponding
+        # join columns from both the frames are coalesces into one.
+        # Consider following examples
+        # Columns in left frame: ["a", "b", "c"]
+        # Columns in right frame: ["b", "d", "e"]
+        # Operation performed: left.merge(right, left_on=["a", "b"], right_on=["b", "d"])
+        # Columns in merged frame: ["a", "b_x", "c", "b_y", "d", "e"]
+        # Here we have two join key pairs ("a", "b") and ("b", "d") for both the pairs
+        # left key is not same is right key so no coalescing is needed.
+        # 'coalesce_config' should evaluate to [NONE, NONE] in this case.
+        #
+        # But if Operation is: left.merge(right, left_on=["a", "b"], right_on=["d", "b"])
+        # Columns in merged frame: ["a", "b", "c", "d", "e"]
+        # Here we have two join key pairs ("a", "d") and ("b", "b") here first pair has
+        # different name so no coalescing is needed for this pair but second pair has
+        # same name on both the sides so column "b" from both the frames is coalesced
+        # into one.
+        # 'coalesce_config' should evaluate to [NONE, LEFT] in this case.
+
+        coalesce_config = []
+        for lkey, rkey in zip(left_keys, right_keys):
+            if lkey == rkey or rkey in external_join_keys:
+                coalesce_config.append(join_utils.JoinKeyCoalesceConfig.LEFT)
+            elif lkey in external_join_keys:
+                coalesce_config.append(join_utils.JoinKeyCoalesceConfig.RIGHT)
+            else:
+                coalesce_config.append(join_utils.JoinKeyCoalesceConfig.NONE)
+
+        # Update given join keys to labels from renamed frame.
+        left_keys = join_utils.map_labels_to_renamed_frame(
+            left_keys, left._modin_frame, left_frame
+        )
+        right_keys = join_utils.map_labels_to_renamed_frame(
+            right_keys, right._modin_frame, right_frame
+        )
+
+        # Error checking for missing and duplicate labels is already done in frontend
+        # layer, so it's safe to use first element from mapped identifiers.
+        left_on_identifiers = [
+            ids[0]
+            for ids in left_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                left_keys
+            )
+        ]
+        right_on_identifiers = [
+            ids[0]
+            for ids in right_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                right_keys
+            )
+        ]
+        merged_frame = join_utils.join(
+            left_frame,
+            right_frame,
+            how=how,
+            left_on=left_on_identifiers,
+            right_on=right_on_identifiers,
+            sort=sort,
+            join_key_coalesce_config=coalesce_config,
+        ).result_frame
+
+        # Add indicator column
+        if indicator:
+            (
+                left_ids,
+                right_ids,
+            ) = merged_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                base_indicator_column_labels
+            )
+            # Indicator columns have unique labels.
+            left_indicator_col = col(left_ids[0])
+            right_indicator_col = col(right_ids[0])
+            indicator_column_value = (
+                when(left_indicator_col.is_null(), "right_only")
+                .when(right_indicator_col.is_null(), "left_only")
+                .otherwise("both")
+            )
+
+            # By default, pandas adds a column called "_merge". The column can be given
+            # a different name by providing a string argument.
+            indicator_column_label = (
+                indicator if isinstance(indicator, str) else "_merge"
+            )
+            merged_frame = merged_frame.append_column(
+                indicator_column_label, indicator_column_value
+            )
+
+            # Drop the base indicator columns.
+            merged_frame = (
+                SnowflakeQueryCompiler(merged_frame)
+                .drop(columns=base_indicator_column_labels)
+                ._modin_frame
+            )
+
+        merged_qc = SnowflakeQueryCompiler(merged_frame)
+
+        # If an index column from left frame is joined with data column from right
+        # frame and both have same name, pandas moves this index column to data column.
+        index_levels_to_move = []
+        for lkey, rkey in zip(left_keys, right_keys):
+            if (
+                lkey == rkey
+                and lkey in left_frame.index_column_pandas_labels
+                and rkey in right_frame.data_column_pandas_labels
+            ):
+                index_levels_to_move.append(
+                    left._modin_frame.index_column_pandas_labels.index(lkey)
+                )
+        if index_levels_to_move:
+            merged_qc = merged_qc.reset_index(level=index_levels_to_move)
+
+        if not left_index and not right_index:
+            # To match native pandas behavior, reset index if left_index and right_index
+            # both are false.
+            merged_qc = merged_qc.reset_index(drop=True)
+
+        return merged_qc
+
+    def _apply_with_udtf_and_dynamic_pivot_along_axis_1(
+        self,
+        func: Union[AggFuncType, UserDefinedFunction],
+        raw: bool,
+        result_type: Optional[Literal["expand", "reduce", "broadcast"]],
+        args: tuple,
+        column_index: native_pd.Index,
+        input_types: list[DataType],
+        partition_size: int = DEFAULT_UDTF_PARTITION_SIZE,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Process apply along axis=1 via UDTF and dynamic pivot.
+
+        Args:
+            func:
+            raw: argument passed to internal df.apply
+            result_type: argument passed to internal df.apply
+            args: argument passed to internal df.apply
+            column_index: index object holding columnar labels of original DataFrame
+            input_types: Snowpark types of columns represented by column_index
+            partition_size: The batch size in rows the UDTF receives at max. Per default set to DEFAULT_UDTF_PARTITION_SIZE.
+            **kwargs:  argument passed to internal df.apply
+
+        Returns:
+            SnowflakeQueryCompiler which may be Series or DataFrame representing result of .apply(axis=1)
+        """
+        # Process using general approach via UDTF + dynamic pivot to handle column expansion case.
+
+        # Overwrite partition-size with kwargs arg
+        if "snowpark_pandas_partition_size" in kwargs:
+            partition_size = kwargs["snowpark_pandas_partition_size"]
+            kwargs.pop("snowpark_pandas_partition_size")
+
+        # add a row position column for partition by
+        # the every batch size in vectorized udtf will be 1
+        new_internal_df = self._modin_frame.ensure_row_position_column()
+        row_position_snowflake_quoted_identifier = (
+            new_internal_df.row_position_snowflake_quoted_identifier
+        )
+
+        # The apply function is encapsulated in a UDTF and run as a stored procedure on the pandas dataframe.
+        func_udtf = create_udtf_for_apply_axis_1(
+            row_position_snowflake_quoted_identifier,
+            func,
+            raw,
+            result_type,
+            args,
+            column_index,
+            input_types,
+            self._modin_frame.ordered_dataframe.session,
+            **kwargs,
+        )
+
+        # Let's start with an example to make the following implementation more clear:
+        #
+        # We have a Snowpark pandas DataFrame:
+        #      A    b
+        #      x    y
+        # 0  1.1  2.2
+        # 1  3.0  NaN
+        # with column level names (foo, bar)
+        #
+        # The underlying Snowpark DataFrame with row position column:
+        # ----------------------------------------------------------------------
+        # |"__index__"  |"(""A"",""x"")" |"(""b"",""y"")" |"__row_position__"  |
+        # ----------------------------------------------------------------------
+        # |0            |1.1             |2.2             |0                   |
+        # |1            |3.0             |NULL            |1                   |
+        # ----------------------------------------------------------------------
+        # The function is encapsulated in a UDTF (func_udtf) through helper function called earlier, for this example:
+        #    func=lambda x: x+1
+
+        # Apply udtf on data columns and partition by row position column into micro batches of maximum size
+        # partition_size.
+        # index columns remain unchanged after apply()
+        # Calling a (v)UDTF requires a PARTITION BY clause. Here, a vectorized UDF is used (pandas Snowpark types will
+        # make the UDTF vectorized).
+        partition_identifier = (
+            new_internal_df.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["partition_id"]
+            )[0]
+        )
+        partition_expression = (
+            round(
+                col(row_position_snowflake_quoted_identifier)
+                / pandas_lit(partition_size)
+            )
+        ).as_(partition_identifier)
+        udtf_dataframe = new_internal_df.ordered_dataframe.select(
+            partition_expression,
+            row_position_snowflake_quoted_identifier,
+            *new_internal_df.data_column_snowflake_quoted_identifiers,
+        ).select(
+            func_udtf(
+                row_position_snowflake_quoted_identifier,
+                *new_internal_df.data_column_snowflake_quoted_identifiers,
+            ).over(partition_by=[partition_identifier]),
+        )
+
+        # TODO SNOW-1060191: after fixing the bug in PIVOT with udtf, remove cache_result
+        # cache_result is currently used to create a temp table and read from it
+        ordered_dataframe = cache_result(udtf_dataframe)
+
+        # After applying the udtf, the underlying Snowpark DataFrame becomes
+        # -------------------------------------------------------------------------------------------
+        # |"__row_position__"  |"LABEL"                                                   |"VALUE"  |
+        # -------------------------------------------------------------------------------------------
+        # |0                   |{"pos": 0, "0": "A", "1": "x", "names": ["foo", "bar"] }  |2.1      |
+        # |0                   |{"pos": 1, "0": "b", "1": "y", "names": ["foo", "bar"] }  |3.2      |
+        # |1                   |{"pos": 0, "0": "A", "1": "x", "names": ["foo", "bar"] }  |4        |
+        # |1                   |{"pos": 1, "0": "b", "1": "y", "names": ["foo", "bar"] }  |null     |
+        # -------------------------------------------------------------------------------------------
+        # the row position column is ensured and maintained because we partition by the row position column
+
+        # perform dynamic pivot
+        # We pivot on the label column so every label can create a column,
+        # which matches the result from df.apply
+        ordered_dataframe = ordered_dataframe.pivot(
+            APPLY_LABEL_COLUMN_QUOTED_IDENTIFIER,
+            None,
+            None,
+            min_(APPLY_VALUE_COLUMN_QUOTED_IDENTIFIER),
+        )
+
+        # After pivot, the underlying Snowpark DataFrame becomes
+        # -----------------------------------------------------------------------------------------
+        # |"__row_position__"  | "'{""pos"": 0, ""0"": ""A"",     |  "'{""pos"": , ""0"": ""b"",  |
+        # |                    |    ""1"": ""x"",  ""names"":     |     ""1"": ""y"", ""names"":  |
+        # |                    |    [""foo"", ""bar""] }'"        |     [""foo"", ""bar""] }'     |
+        # -----------------------------------------------------------------------------------------
+        # |1                   |4                                 |null                           |
+        # |0                   |2.1                               |3.2                            |
+        # -----------------------------------------------------------------------------------------
+
+        data_column_snowflake_quoted_identifiers = (
+            ordered_dataframe.projected_column_snowflake_quoted_identifiers
+        )
+
+        assert (
+            row_position_snowflake_quoted_identifier
+            in data_column_snowflake_quoted_identifiers
+        ), "row position identifier must be present after pivot"
+        data_column_snowflake_quoted_identifiers.remove(
+            row_position_snowflake_quoted_identifier
+        )
+
+        # The pivot result can contain multi-level columns, so we need to inspect the column names.  First, we sample
+        # a column to determine the number of multi-index levels.  We parse the column name as a k,v dict object.
+        object_map = parse_snowflake_object_construct_identifier_to_map(
+            data_column_snowflake_quoted_identifiers[0]
+        )
+
+        # If there's a "names" key this corresponds to the column index names for each level.  This will only happen
+        # if the function maps dataframe -> series, otherwise it must map series -> scalar.
+        if "names" in object_map:
+            column_index_names = object_map["names"]
+            num_column_index_levels = len(column_index_names)
+
+            # Extract the pandas labels and any additional kv map information returned by ApplyFunc.
+            (data_column_pandas_labels, data_column_kv_maps,) = list(
+                zip(
+                    *[
+                        parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label(
+                            data_column_snowflake_quoted_identifier,
+                            num_column_index_levels,
+                        )
+                        for data_column_snowflake_quoted_identifier in data_column_snowflake_quoted_identifiers
+                    ]
+                )
+            )
+
+            # If any of the column index names do not match, then pandas uses None values.
+            if any(column_index_names != kv["names"] for kv in data_column_kv_maps):
+                column_index_names = [None] * num_column_index_levels
+
+            # Look at all the positions, if there's only one position value per label, then we default to the order
+            # dictated by those positions.  For example, if output columns by position are [2,3,1] then that's the
+            # expected result order.
+            data_column_positions = [kv["pos"] for kv in data_column_kv_maps]
+            assert len(set(data_column_positions)) == len(data_column_positions)
+            (
+                data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers,
+            ) = sort_apply_udtf_result_columns_by_pandas_positions(
+                data_column_positions,
+                data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers,
+            )
+
+        else:
+            # This is the series -> scalar case in which case there are no column labels.
+            column_index_names = [None]
+            data_column_pandas_labels = [MODIN_UNNAMED_SERIES_LABEL]
+
+        renamed_data_column_snowflake_quoted_identifiers = (
+            new_internal_df.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=data_column_pandas_labels,
+                excluded=[row_position_snowflake_quoted_identifier],
+            )
+        )
+
+        # rename columns and cast
+        # also sort on the row position column for the join later
+        return_variant, return_type = check_return_variant_and_get_return_type(func)
+        ordered_dataframe = ordered_dataframe.select(
+            row_position_snowflake_quoted_identifier,
+            *[
+                # casting if return type is specified
+                col(old_quoted_identifier).cast(return_type).as_(quoted_identifier)
+                if not return_variant
+                else col(old_quoted_identifier).as_(quoted_identifier)
+                for old_quoted_identifier, quoted_identifier in zip(
+                    data_column_snowflake_quoted_identifiers,
+                    renamed_data_column_snowflake_quoted_identifiers,
+                )
+            ],
+        ).sort(OrderingColumn(row_position_snowflake_quoted_identifier))
+
+        # After applying pivot and renaming, the underlying Snowpark DataFrame becomes
+        # --------------------------------------------------------
+        # |"__row_position__"  |"(""A"",""x"")" |"(""b"",""y"")" |
+        # --------------------------------------------------------
+        # |1                   |4               |null            |
+        # |0                   |2.1             |3.2             |
+        # --------------------------------------------------------
+
+        # because we don't include index columns in udtf and pivot, we need to
+        # join the result from pivot and the original dataframe with index columns
+        # on the row position column to add them back. They are unchanged after apply().
+        # also sort on the row position column for the join later
+
+        # Joining requires unique quoted identifiers. However, it may happen that the row_position_quoted_identifier and
+        # the index_column_snowflake_quoted_identifiers overlap.
+        # remove the row position quoted identifier therefore.
+        index_columns = new_internal_df.index_column_snowflake_quoted_identifiers
+        if row_position_snowflake_quoted_identifier in index_columns:
+            index_columns.remove(row_position_snowflake_quoted_identifier)
+
+        # If there are no index_columns, which is the case when the row position column
+        # is also the index, then there is no need to restore the index columns.
+        # Else, restore them using a join.
+        if len(index_columns) != 0:
+            index_columns = [row_position_snowflake_quoted_identifier] + index_columns
+
+            original_ordered_dataframe_with_index = (
+                new_internal_df.ordered_dataframe.select(
+                    *index_columns,
+                ).sort(OrderingColumn(row_position_snowflake_quoted_identifier))
+            )
+            ordered_dataframe = ordered_dataframe.join(
+                original_ordered_dataframe_with_index,
+                left_on_cols=[row_position_snowflake_quoted_identifier],
+                right_on_cols=[row_position_snowflake_quoted_identifier],
+                how="inner",
+            )
+
+            # After join, the underlying Snowpark DataFrame becomes
+            # ----------------------------------------------------------------------
+            # |"__row_position__"  |"(""A"",""x"")" |"(""b"",""y"")" |"__index__"  |
+            # ----------------------------------------------------------------------
+            # |0                   |2.1             |3.2             |0            |
+            # |1                   |4               |null            |0            |
+            # ----------------------------------------------------------------------
+            # which is the final result and what we want
+
+        new_internal_frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_pandas_index_names=column_index_names,
+            data_column_snowflake_quoted_identifiers=renamed_data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=new_internal_df.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=new_internal_df.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def _apply_udf_row_wise_and_reduce_to_series_along_axis_1(
+        self,
+        func: Union[AggFuncType, UserDefinedFunction],
+        column_index: pandas.Index,
+        input_types: list[DataType],
+        return_type: DataType,
+        session: Session,
+        udf_args: tuple = (),
+        udf_kwargs: dict = {},  # noqa: B006
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Calls pandas apply API per row yielding a Series. `func` is a function that expects a single input parameter which is passed
+        each row as a Series object. E.g., for the following DataFrame
+
+        | index |  A  |  B  |
+        |-------|-----|-----|
+        | 'idx' |  3  |   2 |
+
+        when calling df.apply(lambda x: x['A'], axis=1) the parameter x
+        will be passed as Series object indexed by the original DataFrame's column labels
+        and named after the value of the index per row.
+
+        pd.Series([3, 2], index=['A', 'B'], name='idx')
+
+        In the case of a multi-index, the name will be a tuple of the index columns.
+
+        Args:
+            func: pandas compatible function or object
+            column_index: column index of the original Dataframe
+            input_types: Snowpark types of the data columns
+            return_type: Snowpark type that func produces.
+            udf_args: Positional arguments passed to func after Series value.
+            udf_kwargs: Additional keyword arguments passed to fund after Series value and positional arguments.
+
+        Returns:
+            SnowflakeQueryCompiler representing a Series holding the result of apply(func, axis=1).
+        """
+
+        # extract index columns and types, which are passed as first columns to UDF.
+        type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        index_identifiers = self._modin_frame.index_column_snowflake_quoted_identifiers
+        index_types = [type_map[identifier] for identifier in index_identifiers]
+        n_index_columns = len(index_types)
+
+        # If func is passed as Snowpark UserDefinedFunction, extract underlying wrapped function and add its packages.
+        packages = list(session.get_packages().values())
+        if isinstance(func, UserDefinedFunction):
+            packages += func._packages
+            func = func.func
+
+        # Need to cast columns in wrapper to correct pandas types.
+        pandas_column_types = self.dtypes
+        pandas_type_map = dict(zip(list(column_index), pandas_column_types))
+
+        # TODO: SNOW-1057497 handling of 3rd party packages required by UDF.
+        # create vectorized wrapper restoring column index for row-wise applied UDF func.
+        # no coverage here because server-side invocation
+        @pandas_udf(
+            packages=packages
+            + [pandas],  # use here actual pandas module to match version.
+            input_types=[PandasDataFrameType(index_types + input_types)],
+            return_type=PandasSeriesType(return_type),
+            session=session,
+        )  # pragma: no cover
+        def vectorized_udf(df: pandas.DataFrame) -> pandas.Series:  # pragma: no cover
+
+            # First, set index using the first n_index_columns columns.
+            # The name of the columns does not matter here, as they won't be referenced ever again in the handler.
+            df.set_index(
+                list(df.columns)[:n_index_columns], inplace=True
+            )  # pragma: no cover
+
+            # Second, restore column names.
+            df.columns = column_index  # pragma: no cover
+
+            # Restore types.
+            df = df.astype(pandas_type_map)
+
+            # call apply with result_type='reduce' to force return schema to be a single column.
+            # This will also ensure that the result is always a Series object.
+            series = df.apply(  # pragma: no cover
+                func,
+                axis=1,
+                result_type="reduce",
+                args=udf_args,
+                **udf_kwargs,  # pragma: no cover
+            )  # pragma: no cover
+
+            return series  # pragma: no cover
+
+        # Apply vUDF per row and append result as new column.
+        new_identifier = (
+            self._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["apply_result"]
+            )[0]
+        )
+        new_ordered_frame = append_columns(
+            self._modin_frame.ordered_dataframe,
+            new_identifier,
+            vectorized_udf(
+                index_identifiers
+                + self._modin_frame.data_column_snowflake_quoted_identifiers
+            ),
+        )
+
+        # Construct new internal frame based on index columns + the newly returned series column (which is unnamed).
+        # The result is always a Series.
+        new_frame = InternalFrame.create(
+            ordered_dataframe=new_ordered_frame,
+            data_column_pandas_labels=[MODIN_UNNAMED_SERIES_LABEL],
+            data_column_pandas_index_names=self._modin_frame.data_column_index_names,
+            data_column_snowflake_quoted_identifiers=[new_identifier],
+            index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_frame)
+
+    def apply(
+        self,
+        func: Union[AggFuncType, UserDefinedFunction],
+        axis: int = 0,
+        raw: bool = False,
+        result_type: Optional[Literal["expand", "reduce", "broadcast"]] = None,
+        args: tuple = (),
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Apply passed function across given axis.
+
+        Parameters
+        ----------
+        func : callable(pandas.Series) -> scalar, str, list or dict of such
+            The function to apply to each column or row.
+        axis : {0, 1}
+            Target axis to apply the function along.
+            0 is for index, 1 is for columns.
+        raw : bool, default: False
+            Whether to pass a high-level Series object (False) or a raw representation
+            of the data (True).
+        result_type : {"expand", "reduce", "broadcast", None}, default: None
+            Determines how to treat list-like return type of the `func` (works only if
+            a single function was passed):
+
+            - "expand": expand list-like result into columns.
+            - "reduce": keep result into a single cell (opposite of "expand").
+            - "broadcast": broadcast result to original data shape (overwrite the existing column/row with the function result).
+            - None: use "expand" strategy if Series is returned, "reduce" otherwise.
+        args : Tuple
+            Positional arguments to pass to `func`.
+        **kwargs : dict
+            Keyword arguments to pass to `func`.
+        """
+
+        # axis=0 will always be processed with stored procedure fallback.
+        # Only callables are supported for axis=1 mode for now.
+        require_fallback = (
+            (not callable(func) and not isinstance(func, UserDefinedFunction))
+            or axis == 0
+            or result_type is not None
+            or check_snowpark_pandas_object_in_arg(args)
+            or check_snowpark_pandas_object_in_arg(kwargs)
+        )
+        if require_fallback:
+            return DataFrameDefault.register(native_pd.DataFrame.apply)(
+                self,
+                func=func,
+                axis=axis,
+                raw=raw,
+                result_type=result_type,
+                args=args,
+                **kwargs,
+            )
+        else:
+            # Any logic below applies only to the axis = 1 case. Ensure this is true.
+            assert axis == 1
+
+            # get input types of all data columns from the dataframe directly
+            input_types = [
+                datatype
+                for quoted_identifier, datatype in self._modin_frame.quoted_identifier_to_snowflake_type().items()
+                if quoted_identifier
+                in self._modin_frame.data_column_snowflake_quoted_identifiers
+            ]
+
+            # current columns
+            column_index = self._modin_frame.data_columns_index
+
+            # Extract return type from annotations (or lookup for known pandas functions) for func object,
+            # if not return type could be extracted the variable will hold None.
+            return_type = deduce_return_type_from_function(func)
+
+            # Check whether return_type has been extracted. If return type is not
+            # a Series, tuple or list object, compute df.apply using a vUDF. In this case no column expansion needs to
+            # be performed which means that the result of df.apply(axis=1) is always a Series object.
+            if return_type and not (
+                isinstance(return_type, PandasSeriesType)
+                or isinstance(return_type, ArrayType)
+            ):
+                return self._apply_udf_row_wise_and_reduce_to_series_along_axis_1(
+                    func,
+                    column_index,
+                    input_types,
+                    return_type,
+                    udf_args=args,
+                    udf_kwargs=kwargs,
+                    session=self._modin_frame.ordered_dataframe.session,
+                )
+            else:
+                # Issue actionable warning for users to consider annotating UDF with type annotations
+                # for better performance.
+                function_name = (
+                    func.__name__ if isinstance(func, Callable) else str(func)  # type: ignore[arg-type]
+                )
+                WarningMessage.single_warning(
+                    f"Function {function_name} passed to apply does not have type annotations,"
+                    f" or Snowpark pandas could not extract type annotations. Executing apply"
+                    f" in slow code path which may result in decreased performance. "
+                    f"To disable this warning and improve performance, consider annotating"
+                    f" {function_name} with type annotations."
+                )
+
+                # Result may need to get expanded into multiple columns, or return type of func is not known.
+                # Process using UDTF together with dynamic pivot for either case.
+                return self._apply_with_udtf_and_dynamic_pivot_along_axis_1(
+                    func, raw, result_type, args, column_index, input_types, **kwargs
+                )
+
+    def applymap(
+        self,
+        func: AggFuncType,
+        na_action: Optional[Literal["ignore"]] = None,
+        args: tuple[Any, ...] = (),
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Apply passed function elementwise.
+
+        Parameters
+        ----------
+        func : callable(scalar) -> scalar
+            Function to apply to each element of the QueryCompiler.
+        na_action: If 'ignore', propagate NULL values
+        *args : iterable
+        **kwargs : dict
+        """
+        # Currently, NULL values are always passed into the udtf even if strict=True,
+        # which is a bug on the server side SNOW-880105.
+        # The fix will not land soon, so in order to implement na_action=ignore,
+        # we will use fallback solution for now.
+        if na_action == "ignore":
+            return DataFrameDefault.register(native_pd.DataFrame.applymap)(
+                self,
+                func=func,
+                na_action=na_action,
+                **kwargs,
+            )
+        return_type = deduce_return_type_from_function(func)
+        if not return_type:
+            return_type = VariantType()
+
+        # create and apply udfs on all data columns
+        replace_mapping = {}
+        for f in self._modin_frame.ordered_dataframe.schema.fields:
+            identifier = f.column_identifier.quoted_name
+            if identifier in self._modin_frame.data_column_snowflake_quoted_identifiers:
+                func_udf = create_udf_for_series_apply(
+                    func,
+                    return_type,
+                    f.datatype,
+                    na_action,
+                    self._modin_frame.ordered_dataframe.session,
+                    args,
+                    **kwargs,
+                )
+                replace_mapping[identifier] = func_udf(identifier)
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                replace_mapping
+            ).frame
+        )
+
+    def map(
+        self,
+        arg: Union[AggFuncType, "pd.Series"],
+        na_action: Optional[Literal["ignore"]] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """This method will only be called from Series."""
+        # TODO SNOW-801847: support series.map when arg is a dict/series
+        # Currently, NULL values are always passed into the udtf even if strict=True,
+        # which is a bug on the server side SNOW-880105.
+        # The fix will not land soon, so in order to implement na_action=ignore,
+        # we will use fallback solution for now.
+        if not callable(arg) or na_action == "ignore":
+            return SeriesDefault.register(native_pd.Series.map)(
+                self,
+                arg=arg,
+                na_action=na_action,
+            )
+
+        return self.applymap(func=arg, na_action=na_action)
+
+    def apply_on_series(
+        self, func: AggFuncType, args: tuple[Any, ...] = (), **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Apply passed function on underlying Series.
+
+        Parameters
+        ----------
+        func : callable(pandas.Series) -> scalar, str, list or dict of such
+            The function to apply to each row.
+        *args : iterable
+            Positional arguments to pass to `func`.
+        **kwargs : dict
+            Keyword arguments to pass to `func`.
+        """
+        assert self.is_series_like()
+
+        # TODO SNOW-856682: support other types (str, list, dict) of func
+        if (
+            not callable(func)
+            or check_snowpark_pandas_object_in_arg(args)
+            or check_snowpark_pandas_object_in_arg(kwargs)
+        ):
+            return SeriesDefault.register(native_pd.Series.apply)(
+                self,
+                func=func,
+                args=args,
+                **kwargs,
+            )
+
+        return self.applymap(func, args=args, **kwargs)
+
+    def is_series_like(self) -> bool:
+        """
+        Check whether this QueryCompiler can represent ``modin.pandas.Series`` object.
+
+        Returns
+        -------
+        bool
+            Return True if QueryCompiler has a single column or single row, False
+             otherwise.
+        """
+        # TODO SNOW-864083: look into why len(self.index) == 1 is also considered as series-like
+        return self.get_axis_len(axis=1) == 1 or self.get_axis_len(axis=0) == 1
+
+    def pivot_table(
+        self,
+        index: Any,
+        values: Any,
+        columns: Any,
+        aggfunc: AggFuncType,
+        fill_value: Optional[Scalar],
+        margins: bool,
+        dropna: bool,
+        margins_name: str,
+        observed: bool,
+        sort: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Create a spreadsheet-style pivot table from underlying data.
+
+        Parameters
+        ----------
+        index : column or list of the previous, optional
+            If an array is passed, it must be the same length as the data.
+            The list can contain any of the other types (except list).
+            Keys to group by on the pivot table index. If an array is
+            passed, it is being used as the same manner as column values.
+        values : column to aggregate, or list of the previous, optional
+        columns : column or list of previous, optional
+            If an array is passed, it must be the same length as the data.
+            The list can contain any of the other types (except list).
+            Keys to group by on the pivot table column. If an array is
+            passed, it is being used as the same manner as column values.
+        aggfunc : function, list of functions, dict, default numpy.mean
+            If list of functions passed, the resulting pivot table will
+            have hierarchical columns whose top level are the function
+            names (inferred from the function objects themselves)
+            If dict is passed, the key is column to aggregate and value
+            is function or list of functions.
+        fill_value : scalar, optional
+            Value to replace missing values with (in the resulting pivot
+            table, after aggregation).
+        margins : bool, default False
+            Add all row / columns (e.g. for subtotal / grand totals).
+        dropna : bool, default True
+            Do not include columns whose entries are all NaN. If True,
+            rows with a NaN value in any column will be omitted before
+            computing margins.
+        margins_name : str, default ‘All’
+            Name of the row / column that will contain the totals when
+            margins is True.
+        observed : bool, default False
+            This only applies if any of the groupers are Categoricals.
+            If True: only show observed values for categorical groupers.
+            If False: show all values for categorical groupers.
+        sort : bool, default True
+            Specifies if the result should be sorted.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+        """
+        # TODO: SNOW-838811 observed/categorical
+        if observed:
+            raise NotImplementedError("Not implemented observed")
+
+        # TODO: SNOW-838819 sort/order by
+        if not sort:
+            raise NotImplementedError("Not implemented not sorted")
+
+        # TODO: (SNOW-853334) Support callable agg functions
+        if aggfunc and callable(aggfunc):
+            raise NotImplementedError(
+                f"Not implemented callable aggregation function {aggfunc}."
+            )
+
+        if columns is not None and isinstance(columns, Hashable):
+            columns = [columns]
+
+        if index is not None and isinstance(index, Hashable):
+            index = [index]
+
+        # TODO: SNOW-857485 Support for non-str and list of non-str for index/columns/values
+        if index and (
+            not isinstance(index, str)
+            and not all([isinstance(v, str) for v in index])
+            and None not in index
+        ):
+            raise NotImplementedError(
+                f"Not implemented non-string of list of string {index}."
+            )
+
+        if values and (
+            not isinstance(values, str)
+            and not all([isinstance(v, str) for v in values])
+            and None not in values
+        ):
+            raise NotImplementedError(
+                f"Not implemented non-string of list of string {values}."
+            )
+
+        if columns and (
+            not isinstance(columns, str)
+            and not all([isinstance(v, str) for v in columns])
+            and None not in columns
+        ):
+            raise NotImplementedError(
+                f"Not implemented non-string of list of string {columns}."
+            )
+
+        if aggfunc is None or (isinstance(aggfunc, list) and not all(aggfunc)):
+            raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
+
+        if isinstance(aggfunc, dict) and (
+            not all(
+                [all(af if isinstance(af, list) else [af]) for af in aggfunc.values()]
+            )
+        ):
+            raise TypeError("Must provide 'func' or named aggregation **kwargs.")
+
+        # With margins, a dictionary aggfunc that maps to list of aggregations is not supported by pandas.  We return
+        # friendly error message in this case.
+        if (
+            margins
+            and isinstance(aggfunc, dict)
+            and any(not isinstance(af, str) for af in aggfunc.values())
+        ):
+            raise ValueError("Margins not supported if list of aggregation functions")
+
+        # Duplicate pivot column and index are not allowed, but duplicate aggregation values are supported.
+        index_and_data_column_pandas_labels = (
+            self._modin_frame.index_column_pandas_labels
+            + self._modin_frame.data_column_pandas_labels
+        )
+        if columns:
+            check_valid_pandas_labels(columns, index_and_data_column_pandas_labels)
+
+        if index:
+            check_valid_pandas_labels(index, index_and_data_column_pandas_labels)
+
+        # We have checked there are no duplicates, so there will be only one matching.
+
+        groupby_snowflake_quoted_identifiers = (
+            [
+                snowflake_quoted_identifier[0]
+                for snowflake_quoted_identifier in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    index
+                )
+            ]
+            if index
+            else []
+        )
+
+        pivot_snowflake_quoted_identifiers = (
+            [
+                snowflake_quoted_identifier[0]
+                for snowflake_quoted_identifier in self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    columns
+                )
+            ]
+            if columns
+            else []
+        )
+
+        if len(groupby_snowflake_quoted_identifiers) == 0:
+            raise NotImplementedError(
+                "pivot_table with no index configuration is currently not supported"
+            )
+
+        if values is None:
+            # If no values (aggregation columns) are specified, then we use all data columns that are neither
+            # groupby (index) nor pivot columns as the aggregation columns.  For example, a dataframe with
+            # index=['A','B'], data=['C','E'] and if 'A' is used in groupby, and 'C' used as pivot, then 'E' would be
+            # used as the values column, and unused index column 'B' would be dropped.
+            full_columns_and_index = (
+                list(columns) if columns else [] + list(index) if index else []
+            )
+            values = self._modin_frame.data_column_pandas_labels.copy()
+            for pandas_label_tuple in full_columns_and_index:
+                values.remove(pandas_label_tuple)
+
+        if is_list_like(values):
+            values = list(values)
+
+        values_label_to_identifier_pairs_list = (
+            generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings(
+                values, self._modin_frame
+            )
+        )
+
+        pivot_aggr_groupings = list(
+            generate_single_pivot_labels(
+                values_label_to_identifier_pairs_list,
+                aggfunc,
+                len(pivot_snowflake_quoted_identifiers) > 0,
+                isinstance(values, list),
+                sort,
+            )
+        )
+
+        # When aggfunc is not a list, we should sort the outer level of pandas labels.
+        pivotted_frame = pivot_helper(
+            self._modin_frame,
+            pivot_aggr_groupings,
+            not dropna,
+            not isinstance(aggfunc, list),
+            columns,
+            groupby_snowflake_quoted_identifiers,
+            pivot_snowflake_quoted_identifiers,
+            index,
+        )
+
+        pivot_qc = SnowflakeQueryCompiler(pivotted_frame)
+
+        # If dropna, then filter out any rows that contain all null aggregation values.
+        if dropna:
+            pivot_qc = pivot_qc.dropna(
+                axis=0, how="all", subset=pivotted_frame.data_column_pandas_labels
+            )
+
+        # If there is a fill_value then project with coalesce on the non-group by columns.
+        if fill_value:
+            pivot_qc = pivot_qc.fillna(fill_value, self_is_series=False)
+
+        # Add margins if specified, note this will also add the row position since the margin row needs to be fixed
+        # as the last row of the dataframe.  If no margins, then we order by the group by columns.
+        if margins and pivot_aggr_groupings and pivot_snowflake_quoted_identifiers:
+            pivot_qc = expand_pivot_result_with_pivot_table_margins(
+                pivot_aggr_groupings,
+                groupby_snowflake_quoted_identifiers,
+                pivot_snowflake_quoted_identifiers,
+                self._modin_frame.ordered_dataframe,
+                pivot_qc,
+                margins_name,
+                fill_value,
+            )
+
+        # Rename the data column snowflake quoted identifiers to be closer to pandas labels given we
+        # may have done unwrapping of surrounding quotes, ie. so will unwrap single quotes in snowflake identifiers.
+        # For example, snowflake constant string "'shi''ne'" would become "shi'ne"
+        name_normalized_frame = (
+            pivot_qc._modin_frame.normalize_snowflake_quoted_identifiers_with_pandas_label()
+        )
+
+        return SnowflakeQueryCompiler(name_normalized_frame)
+
+    def take_2d_positional(
+        self,
+        index: Union["SnowflakeQueryCompiler", slice],
+        columns: Union["SnowflakeQueryCompiler", slice, int, bool, list, AnyArrayLike],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Index QueryCompiler with passed keys.
+
+        Parameters
+        ----------
+        index : Positional indices of rows to grab.
+        columns : Positional indices of columns to grab.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New masked QueryCompiler.
+        """
+        # TODO: SNOW-884220 support multiindex
+        # index can only be a query compiler or slice object
+        assert isinstance(index, (SnowflakeQueryCompiler, slice))
+
+        if isinstance(index, slice):
+            with_row_selector = get_frame_by_row_pos_slice_frame(
+                internal_frame=self._modin_frame, key=index
+            )
+        else:
+            with_row_selector = get_frame_by_row_pos_frame(
+                internal_frame=self._modin_frame,
+                key=index._modin_frame,
+            )
+
+        with_col_selector = get_frame_by_col_pos(
+            internal_frame=with_row_selector,
+            columns=columns,
+        )
+
+        return SnowflakeQueryCompiler(with_col_selector)
+
+    def convert_dtypes(
+        self,
+        infer_objects: bool = True,
+        convert_string: bool = True,
+        convert_integer: bool = True,
+        convert_boolean: bool = True,
+        convert_floating: bool = True,
+        dtype_backend: DtypeBackend = "numpy_nullable",
+    ) -> None:
+        """
+        Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``.
+
+        Parameters
+        ----------
+        infer_objects : bool, default: True
+            Whether object dtypes should be converted to the best possible types.
+        convert_string : bool, default: True
+            Whether object dtypes should be converted to ``pd.StringDtype()``.
+        convert_integer : bool, default: True
+            Whether, if possbile, conversion should be done to integer extension types.
+        convert_boolean : bool, default: True
+            Whether object dtypes should be converted to ``pd.BooleanDtype()``.
+        convert_floating : bool, default: True
+            Whether, if possible, conversion can be done to floating extension types.
+            If `convert_integer` is also True, preference will be give to integer dtypes
+            if the floats can be faithfully casted to integers.
+        dtype_backend: {‘numpy_nullable’, ‘pyarrow’}, default ‘numpy_nullable’
+            Back-end data type applied to the resultant DataFrame (still experimental). Snowpark
+            pandas ignores this argument.
+
+        Returns
+        -------
+        None
+        """
+        raise NotImplementedError(
+            "convert_dtype is not supported in Snowpark pandas since Snowpark pandas is already using a nullable data "
+            "types internally"
+        )
+
+    def get_axis_len(
+        self,
+        axis: int,
+    ) -> int:
+        """Get the length of the specified axis.
+
+        If axis = 0, return number of rows.
+        Else, return number of data columns.
+
+        Parameters
+        ----------
+        axis: 0 or 1.
+
+        Returns
+        -------
+        Length of the specified axis.
+        """
+        return self._modin_frame.num_rows if axis == 0 else len(self.columns)
+
+    def _nunique_columns(self, dropna: bool) -> "SnowflakeQueryCompiler":
+        """
+        Helper function to compute the number of unique elements in each column.
+
+        Parameters
+        ----------
+        dropna: bool
+            When true, does not consider NULL values as elements.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A one-row QC with the unique counts for each column. This will always have a single
+            index column with the value "unique" in its row, regardless of the levels of the
+            original index. This may be dropped later if necessary.
+        """
+        internal_frame = self._modin_frame
+        new_index_identifier = (
+            internal_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[INDEX_LABEL],
+            )[0]
+        )
+
+        if len(self.columns) == 0:
+            return SnowflakeQueryCompiler.from_pandas(
+                native_pd.DataFrame([], index=["unique"], dtype=float)
+            )
+
+        def make_nunique(identifier: str, dropna: bool) -> SnowparkColumn:
+            if dropna:
+                # do not include null values in count
+                return count_distinct(col(identifier))
+            else:
+                # COUNT(DISTINCT) ignores NULL values, so if there is a NULL value in the column,
+                # we include it via IFF(MAX(<col> IS NULL)), 1, 0) which will return 1 if there is
+                # at least one NULL contained within a column, and 0 if there are no NULL values.
+                return count_distinct(col(identifier)) + iff(
+                    max_(col(identifier).is_null()), 1, 0
+                )
+
+        # get a new ordered df with nunique columns
+        nunique_columns = [
+            make_nunique(identifier, dropna).as_(identifier)
+            for identifier in internal_frame.data_column_snowflake_quoted_identifiers
+        ]
+
+        # since we don't compute count on the index, we need to add a column for it
+        ordered_dataframe = append_columns(
+            internal_frame.ordered_dataframe.agg(*nunique_columns),
+            [new_index_identifier],
+            [pandas_lit("unique")],
+        )
+
+        # get a new internal frame
+        frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=[INDEX_LABEL],
+            index_column_snowflake_quoted_identifiers=[new_index_identifier],
+        )
+        return SnowflakeQueryCompiler(frame)
+
+    def nunique(
+        self, axis: Axis, dropna: bool, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        if not isinstance(dropna, bool):
+            raise ValueError("dropna must be of type bool")
+        # support axis=0 only where unique values per column are counted using COUNT(DISTINCT)
+        # fallback for axis=1 where unique values row-wise are counted
+        if axis == 1:
+            return DataFrameDefault.register(native_pd.DataFrame.nunique)(
+                self, axis=axis, dropna=dropna, **kwargs
+            )
+        else:
+            # Result is basically a series with the column labels as index and the distinct count as values
+            # for each data column
+            # frame holds rows with nunique values, but result must be a series so transpose single row
+            return self._nunique_columns(dropna).transpose_single_row()
+
+    def unique(self) -> "SnowflakeQueryCompiler":
+        """Compute unique elements for series. Preserves order of how elements are encountered. Keyword arguments are
+        empty.
+
+        Returns
+        -------
+        Return query compiler with unique values.
+        """
+
+        assert 1 == len(
+            self._modin_frame.data_column_snowflake_quoted_identifiers
+        ), "unique can be only applied to 1-D DataFrame (Series)"
+
+        # unique is ordered in the original occurrence of the elements, which is equivalent to
+        # groupby aggregation with no aggregation function, sort = False, as_index = False and
+        # dropna = False.
+        return self.groupby_agg(
+            by=self._modin_frame.data_column_pandas_labels[0],
+            agg_func={},
+            axis=0,
+            groupby_kwargs={"sort": False, "as_index": False, "dropna": False},
+            agg_args=[],
+            agg_kwargs={},
+        )
+
+    def to_numeric(
+        self,
+        errors: Literal["ignore", "raise", "coerce"] = "raise",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Convert underlying data to numeric dtype.
+
+        Args:
+            errors: {"ignore", "raise", "coerce"}
+
+        Returns:
+            SnowflakeQueryCompiler: New SnowflakeQueryCompiler with converted to numeric values.
+        """
+        assert len(self.columns) == 1, "to_numeric only work for series"
+
+        col_id = self._modin_frame.data_column_snowflake_quoted_identifiers[0]
+        col_id_sf_type = self._modin_frame.quoted_identifier_to_snowflake_type()[col_id]
+        # handle unsupported types
+        if isinstance(
+            col_id_sf_type, (DateType, TimeType, MapType, ArrayType, BinaryType)
+        ):
+            if errors == "raise":
+                raise TypeError(f"Invalid object type {col_id_sf_type}")
+            elif errors == "coerce":
+                return SnowflakeQueryCompiler(
+                    self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                        {col_id: pandas_lit(None)}
+                    ).frame
+                )
+            elif errors == "ignore":
+                return self
+            else:
+                raise ValueError(
+                    f"invalid error value specified: {errors}"
+                )  # pragma: no cover
+
+        if isinstance(col_id_sf_type, (_NumericType, BooleanType)):
+            # no need to convert
+            return self
+
+        if errors == "ignore":
+            # if any value is failed to parse, to_numeric returns the original series when error = 'ignore'. This
+            # requirement is hard to implement in Snowpark pandas so fallback for now.
+            return SeriesDefault.register(native_pd.to_numeric)(self, errors=errors)
+
+        new_col = col(col_id)
+        new_col_type_is_numeric = False
+        if isinstance(col_id_sf_type, TimestampType):
+            # turn those date time type to nanoseconds
+            new_col = date_part("epoch_nanosecond", new_col)
+            new_col_type_is_numeric = True
+        elif not isinstance(col_id_sf_type, StringType):
+            # convert to string by default for better error message
+            # e.g., "Numeric value 'apple' is not recognized"
+            new_col = cast(new_col, StringType())
+
+        if not new_col_type_is_numeric:
+            # pandas.to_numeric treats empty string as np.nan but Snowflake to_double will treat it as invalid, so we
+            # handle this corner case here
+            new_col = iff(length(new_col) == 0, pandas_lit(None), new_col)
+
+            # always convert to double for non-numeric types, e.g., string, because it is nontrivial to check whether
+            # the values are integer only
+            if errors in (None, "raise"):
+                new_col = builtin("to_double")(new_col)
+            else:
+                # try_to_double will return NULL if conversion fails, which matches coerce behavior
+                new_col = builtin("try_to_double")(new_col)
+
+            if errors == "ignore":
+                new_col = coalesce(to_variant(new_col), col(col_id))
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                {col_id: new_col}
+            ).frame
+        )
+
+    def take_2d_labels(
+        self,
+        index: Union[
+            "SnowflakeQueryCompiler", Scalar, tuple, slice, list, pd.Index, np.ndarray
+        ],
+        columns: Union[
+            "SnowflakeQueryCompiler", Scalar, slice, list, pd.Index, np.ndarray
+        ],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Index QueryCompiler with passed label keys.
+
+        Parameters
+        ----------
+        index : Label indices of rows to grab.
+        columns : Label indices of columns to grab.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+        """
+        if self._modin_frame.is_multiindex(axis=0) and (
+            is_scalar(index) or isinstance(index, tuple)
+        ):
+            # convert multiindex scalar or tuple key to tuple so get_frame_by_row_label will handle it specifically,
+            # i.e., use prefix match
+            if is_scalar(index):
+                index = (index,)
+        elif is_scalar(index):
+            index = pd.Series([index])._query_compiler
+        # convert list like to series
+        elif is_list_like(index):
+            index = pd.Series(index)
+            if index.dtype == "bool":
+                # boolean list like indexer is always select rows by row position
+                return SnowflakeQueryCompiler(
+                    get_frame_by_col_label(
+                        get_frame_by_row_pos_frame(
+                            internal_frame=self._modin_frame,
+                            key=index._query_compiler._modin_frame,
+                        ),
+                        columns,
+                    )
+                )
+            index = index._query_compiler
+
+        return SnowflakeQueryCompiler(
+            get_frame_by_col_label(
+                get_frame_by_row_label(
+                    internal_frame=self._modin_frame,
+                    key=index._modin_frame
+                    if isinstance(index, SnowflakeQueryCompiler)
+                    else index,
+                ),
+                columns,
+            )
+        )
+
+    def has_multiindex(self, axis: int = 0) -> bool:
+        """
+        Check if specified axis is indexed by MultiIndex.
+
+        Parameters
+        ----------
+        axis : {0, 1}, default: 0
+            The axis to check (0 - index, 1 - columns).
+
+        Returns
+        -------
+        bool
+            True if index at specified axis is MultiIndex and False otherwise.
+        """
+        return self._modin_frame.is_multiindex(axis=axis)
+
+    def nlevels(self, axis: int = 0) -> int:
+        """
+        Integer number of levels in the index.
+
+        Args:
+            axis: the axis of the index
+
+        Returns:
+            number of levels
+        """
+        return self._modin_frame.num_index_levels(axis=axis)
+
+    def isna(self) -> "SnowflakeQueryCompiler":
+        """
+        Check for each element of self whether it's NaN.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Boolean mask for self of whether an element at the corresponding
+            position is NaN.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: is_null(col_name)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def notna(self) -> "SnowflakeQueryCompiler":
+        """
+        Check for each element of `self` whether it's existing (non-missing) value.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Boolean mask for `self` of whether an element at the corresponding
+            position is not NaN.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: not_(is_null(col_name))
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def transpose_single_row(self) -> "SnowflakeQueryCompiler":
+        """
+        Transposes this QueryCompiler, assumes that this QueryCompiler holds a single row. Does not explicitly
+        check this is true, left to the caller to ensure this is true.
+        Note that the pandas label for the result column will be lost, and set to "None".
+
+        Returns:
+            SnowflakeQueryCompiler
+                Transposed new QueryCompiler object.
+        """
+        frame = self._modin_frame
+
+        # Handle case where the dataframe has empty columns.
+        if len(frame.data_columns_index) == 0:
+            return transpose_empty_df(frame)
+
+        # This follows the same approach used in SnowflakeQueryCompiler.transpose().
+        # However, as an optimization, only steps (1), (2), and (4) from the four steps described in
+        # SnowflakeQueryCompiler.transpose() can be performed. The pivot operation in STEP (3) can be skipped
+        # given that the QueryCompiler holds a single row.
+
+        # STEPS (1) and (2) are both achieved using the following call.
+        # STEP 1) Construct a temporary index column that contains the original index with position.
+        # STEP 2) Perform an unpivot which flattens the original data columns into a single name and value rows
+        # grouped by the temporary transpose index column.
+        unpivot_result = prepare_and_unpivot_for_transpose(
+            frame, self, is_single_row=True
+        )
+
+        # Handle fallback to pandas case.
+        if isinstance(unpivot_result, SnowflakeQueryCompiler):
+            return unpivot_result
+
+        # STEP 3) The pivot operation is skipped for the single row case.
+
+        # STEP 4) The data has been transposed, all that remains is cleaning the labels.  For the non-index column,
+        # the order and name is parsed from the column name, sorted and aliased for better consistency.  For the
+        # TRANSPOSE_NAME_COLUMN, the row position and index names are separated into distinct columns.  In the case
+        # of a multi-level index, the index is split into a column per index.
+        new_internal_frame = clean_up_transpose_result_index_and_labels(
+            frame,
+            unpivot_result.ordered_dataframe,
+            unpivot_result.variable_name_quoted_snowflake_identifier,
+            unpivot_result.object_name_quoted_snowflake_identifier,
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def transpose(self) -> "SnowflakeQueryCompiler":
+        """
+        Transpose this QueryCompiler.
+
+        Returns:
+            SnowflakeQueryCompiler
+                Transposed new QueryCompiler object.
+        """
+        frame = self._modin_frame
+
+        # Handle case where the dataframe has empty columns.
+        if len(frame.data_columns_index) == 0:
+            return transpose_empty_df(frame)
+
+        # The following approach to implementing transpose relies on combining unpivot and pivot operations to flip
+        # the columns into rows.  We also must explicitly maintain ordering to be consistent with pandas.  Consider
+        # the following example.
+        #
+        # df = pd.DataFrame(data={
+        #       'name': ['Alice', 'Bob', 'Bob'],
+        #       'score': [9.5, 8, 9.5],
+        #       'employed': [False, True, False],
+        #       'kids': [0, 0, 1]})
+        # df.set_index('name', inplace=True)
+        #
+        #       | score | employed | kids
+        #  name |       |          |
+        # ======|=======|==========|======
+        # Alice | 9.5   | False    | 0
+        # Bob   | 8.0   | True     | 0
+        # Bob   | 9.5   | False    | 1
+        #
+        # To obtain the transpose of pandas dataframe, we go through the following steps.
+        # 1) Create a single column for the index (TRANSPOSE_INDEX), this is especially needed if it is a
+        # multi-level index, and also to store ordering information which would otherwise be lost during operations.
+        # This table includes the dummy row added with row position = -1.
+        #
+        # TRANSPOSE_INDEX        | [0, "score"] | [1, "employed"] | [2, "kids"]
+        # =======================|==============|=================|============
+        # {"0":"Alice","row":-1} | 9.5          | False           | 0
+        # {"0":"Alice","row":0}  | 9.5          | False           | 0
+        # {"0":"Bob","row":1}    | 8.0          | True            | 0
+        # {"0":"Bob","row":2}    | 9.5          | False           | 1
+        #
+        # 2) Unpivot the non-index columns, this creates a column (TRANSPOSE_NAME_COLUMN) and value
+        # (TRANSPOSE_VALUE_COLUMN) containing all the non-index column values from the original dataframe.
+        # In case of single-row datframes, we skip step 3 below. But we still need to simulate the format of
+        # its output dataframe, so that the output of this step can be consumed by step 4.
+        # For this purpose, instead of TRANSPOSE_VALUE_COLUMN, we use special column name (TRANSPOSE_VALUE_COLUMN_FOR_SINGLE_ROW),
+        # which follows the pattern of the corresponding column name in step 3. We also drop the TRANSPOSE_INDEX column.
+        #
+        # Sample output for a multi-row dataframe
+        #
+        #  TRANSPOSE_INDEX       | TRANSPOSE_NAME_COLUMN   | TRANSPOSE_VALUE_COLUMN
+        # =======================+=========================+=======================
+        # {"0":"Alice","row":-1} | [0, "score", "wmqm"]    | 9.5
+        # {"0":"Alice","row":-1} | [1, "employed", "sagn"] | false
+        # {"0":"Alice","row":-1} | [2, "kids", "6sky"]     | 0
+        #  {"0":"Alice","row":0} | [0, "score"]            | 9.5
+        #  {"0":"Alice","row":0} | [1, "employed"]         | false
+        #  {"0":"Alice","row":0} | [2, "kids"]             | 0
+        #  {"0":"Bob","row":1}   | [0, "score"]            | 8.0
+        #  ...
+        #
+        # Sample output for a single-row dataframe
+        #
+        # TRANSPOSE_NAME_COLUMN | TRANSPOSE_VALUE_COLUMN_FOR_SINGLE_ROW
+        # ======================+======================================
+        #  [0, "score"]          | 9.5
+        #  [1, "employed"]       | false
+        #  [2, "kids"]           | 0
+        #
+        # 3) Pivot the index column (TRANSPOSE_INDEX), this transposes the original index into a column index and
+        # aggregate on the TRANSPOSE_VALUE_COLUMN.  This spreads out previously unpivot values under the respective
+        # column index columns completing the transpose. This step is skipped for single-row datframes.
+        #
+        #  TRANSPOSE_NAME_COLUMN | '{"0":"Alice","row":-1}' | '{"0":"Alice","row":0}' | '{"0":"Bob","row":1}' | '{"0":"Bob","row":2}'
+        # =======================+==========================+=========================+=======================+======================
+        #  [0, "score"]          | 9.5                      |  9.5                    | 8.0                   | 9.5
+        #  [1, "employed"]       | false                    |  false                  | true                  | false
+        #  [2, "kids"]           | 0                        |  0                      | 0                     | 1
+        #
+        # 4) Clean up the labels and re-order to reflect their original positioning but now transposed.
+        # The resulting transpose would be: df.T (note that <row_position> is internal column and 'name' is index
+        # data column in this example).
+        # Here the dummy row, that is converted to a column after pivot, is dropped from the final dataframe.
+        #
+        # <row_position> | name     | Alice | Bob  | Bob
+        # ===============|==========|=======|======|======
+        # 0              | score    | 9.5   | 8.0  | 9.5
+        # 1              | employed | False | True | False
+        # 2              | kids     | 0     | 0    | 1
+        #
+        # The SQL equivalent of these steps are as follows:
+        #
+        # --STEP (4)
+        # select index_obj[0] as row_position, index_obj[1] as name, * from (
+        #     select parse_json(col_name) as index_obj, * from (
+        #         -- STEP (1)
+        #         select cast(object_construct('row', row_position, '0', name) as varchar) as index,
+        #             cast(score as varchar) as "[0, ""score""]",
+        #             cast(employed as varchar) as "[1, ""employed""]",
+        #             cast(kids as varchar) as "[2, ""kids""]"
+        #         from df3
+        #     -- STEP (2)
+        #     ) unpivot(val for col_name in (
+        #         "[0, ""score""]",
+        #         "[1, ""employed""]",
+        #         "[2, ""kids""]"
+        #     ))
+        # -- STEP (3)
+        # ) pivot(min(val) for index in (any))
+        # order by row_position;
+
+        # STEPS (1) and (2) are both achieved using the following call.
+        # STEP 1) Construct a temporary index column that contains the original index with position.
+        # STEP 2) Perform an unpivot which flattens the original data columns into a single name and value rows
+        # grouped by the temporary transpose index column.
+
+        unpivot_result = prepare_and_unpivot_for_transpose(
+            frame, self, is_single_row=False
+        )
+
+        # Handle fallback to pandas case.
+        if isinstance(unpivot_result, SnowflakeQueryCompiler):
+            return unpivot_result
+
+        # STEP 3) Perform a dynamic pivot on the temporary transpose index column (TRANSPOSE_INDEX), as the values
+        # will become the new column labels.
+        # The TRANSPOSE_VALUE_COLUMN values become grouped under the remaining
+        # TRANSPOSE_NAME_COLUMN values.  Since there are only unique values here we can use any simple aggregation like
+        # min to reflect the same value through the pivot. The ordering is also stored in the column names which
+        # is later extracted as part of final column ordering sort.
+        ordered_dataframe = unpivot_result.ordered_dataframe.pivot(
+            col(unpivot_result.index_snowflake_quoted_identifier),
+            None,
+            None,
+            min_(col(unpivot_result.new_value_quoted_identifier)),
+        )
+
+        # STEP 4) The data has been transposed, all that remains is cleaning the labels.  For the non-index column,
+        # the order and name is parsed from the column name, sorted and aliased for better consistency.  For the
+        # TRANSPOSE_NAME_COLUMN, the row position and index names are separated into distinct columns.  In the case
+        # of a multi-level index, the index is split into a column per index.
+        new_internal_frame = clean_up_transpose_result_index_and_labels(
+            frame,
+            ordered_dataframe,
+            unpivot_result.variable_name_quoted_snowflake_identifier,
+            unpivot_result.object_name_quoted_snowflake_identifier,
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def invert(self) -> "SnowflakeQueryCompiler":
+        """
+        Apply bitwise inversion for each element of the QueryCompiler.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler containing bitwise inversion for each value.
+        """
+
+        # use NOT to compute ~
+        replace_mapping = {
+            identifier: not_(col(identifier))
+            for identifier in self._modin_frame.data_column_snowflake_quoted_identifiers
+        }
+
+        new_internal_frame = (
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                replace_mapping
+            ).frame
+        )
+        new_qc = self.__constructor__(new_internal_frame)
+        if hasattr(self, "_shape_hint"):
+            new_qc._shape_hint = self._shape_hint
+
+        return new_qc
+
+    def astype(
+        self,
+        col_dtypes_map: dict[str, Union[dtype, ExtensionDtype]],
+        errors: Literal["raise", "ignore"] = "raise",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Convert columns dtypes to given dtypes.
+
+        Parameters
+        ----------
+        col_dtypes_map : dict
+            Map for column names and new dtypes.
+        errors : {'raise', 'ignore'}, default: 'raise'
+            Control raising of exceptions on invalid data for provided dtype.
+            - raise : allow exceptions to be raised
+            - ignore : suppress exceptions. On error return original object.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New QueryCompiler with updated dtypes.
+        """
+        if errors != "raise":
+            return DataFrameDefault.register(native_pd.DataFrame.astype)(
+                self, col_dtypes_map, errors=errors
+            )
+        col_dtypes_curr = {
+            k: v for k, v in self.dtypes.to_dict().items() if k in col_dtypes_map
+        }
+
+        astype_mapping = {}
+        id_to_sf_type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        labels = list(col_dtypes_map.keys())
+        col_ids = (
+            self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                labels, include_index=False
+            )
+        )
+        for ids, label in zip(col_ids, labels):
+            for id in ids:
+                to_dtype = col_dtypes_map[label]
+                to_sf_type = TypeMapper.to_snowflake(to_dtype)
+                from_dtype = col_dtypes_curr[label]
+                from_sf_type = id_to_sf_type_map[id]
+                if is_astype_type_error(from_sf_type, to_sf_type):
+                    raise TypeError(
+                        f"dtype {pandas_dtype(from_dtype)} cannot be converted to {pandas_dtype(to_dtype)}"
+                    )
+                astype_mapping[id] = column_astype(
+                    id,
+                    from_sf_type,
+                    to_dtype,
+                    to_sf_type,
+                )
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                astype_mapping
+            ).frame
+        )
+
+    def set_2d_labels(
+        self,
+        index: Union[Scalar, slice, "SnowflakeQueryCompiler"],
+        columns: Union[
+            "SnowflakeQueryCompiler",
+            tuple,
+            slice,
+            list,
+            pd.Index,
+            np.ndarray,
+        ],
+        item: Union[Scalar, AnyArrayLike, "SnowflakeQueryCompiler"],
+        matching_item_columns_by_label: bool,
+        matching_item_rows_by_label: bool,
+        index_is_bool_indexer: bool,
+        deduplicate_columns: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Create a new SnowflakeQueryCompiler with indexed columns and rows replaced by item.
+
+        Args:
+            index: labels of rows to set
+            columns:  labels of columns to set
+            item: new values that will be set to indexed columns and rows
+            matching_item_columns_by_label: if True (e.g., df.loc[row_key, col_key] = item), only ``item``'s column labels match
+                with col_key are used to set df values; otherwise, (e.g., df.loc[row_key_only] = item), use item's
+                column position to match with the main frame. E.g., df has columns ["A", "B", "C"] and item has columns
+                ["C", "B", "A"], df.loc[:] = item will update df's columns "A", "B", "C" using item column "C", "B", "A"
+                respectively.
+            matching_item_rows_by_label: if True (e.g., df.loc[row_key, col_key] = item), only ``item``'s row labels match
+                with row_key are used to set df values; otherwise, (e.g., df.loc[col_key_only] = item), use item's
+                row position to match with the main frame. E.g., df has rows ["A", "B", "C"] and item is a 2D NumPy Array
+                df.loc[:] = item will update df's rows "A", "B", "C" using item's rows 0, 1, 2.
+                respectively.
+                `matching_item_rows_by_label` diverges from pandas behavior due to the lazy nature of snowpandas. In native
+                pandas, if the length of the objects that we are joining is not equivalent, then pandas would error out
+                because the shape is not broadcastable; while here, we use standard left join behavior.
+            index_is_bool_indexer: if True, the index is a boolean indexer.
+            deduplicate_columns: if True, deduplicate columns from ``columns``, e.g., if columns = ["A","A"], only the
+                second "A" column will be used.
+        Returns:
+            Updated SnowflakeQueryCompiler
+        """
+        # TODO SNOW-962260 support multiindex
+        # TODO SNOW-966481 support series
+        # TODO SNOW-978570 support index or column is None
+        if isinstance(index, slice):
+            if index != slice(None):
+                # No need to get index frame by slice if index is slice(None)
+                row_frame = get_index_frame_by_row_label_slice(self._modin_frame, index)
+                index = SnowflakeQueryCompiler(row_frame)
+
+        result_frame = set_frame_2d_labels(
+            internal_frame=self._modin_frame,
+            index=index._modin_frame
+            if isinstance(index, SnowflakeQueryCompiler)
+            else index,
+            columns=columns,
+            item=item._modin_frame
+            if isinstance(item, SnowflakeQueryCompiler)
+            else item,
+            matching_item_columns_by_label=matching_item_columns_by_label,
+            matching_item_rows_by_label=matching_item_rows_by_label,
+            index_is_bool_indexer=index_is_bool_indexer,
+            deduplicate_columns=deduplicate_columns,
+        )
+
+        return SnowflakeQueryCompiler(result_frame)
+
+    def set_2d_positional(
+        self,
+        index: Union["SnowflakeQueryCompiler", slice, list, tuple, Scalar],
+        columns: Union["SnowflakeQueryCompiler", slice, list, tuple, Scalar],
+        item: Union["SnowflakeQueryCompiler", Scalar],
+        set_as_coords: bool,
+        is_item_series: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Create a new SnowflakeQueryCompiler with indexed columns and rows replaced by item .
+        Parameters
+        ----------
+        index : SnowflakeQueryCompiler
+            Positional indices of rows to set.
+        columns : SnowflakeQueryCompiler
+            Positional indices of columns to set.
+        item : new values that will be set to indexed columns and rows.
+        set_as_coords: if setting (row, col) pairs as co-ordinates rather than entire row or col.
+        is_item_series: if item is from a Series
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+        """
+        row_positions_frame = get_row_pos_frame_from_row_key(index, self._modin_frame)
+
+        column_positions = get_valid_col_pos_list_from_columns(
+            columns, self.get_axis_len(1)
+        )
+
+        result_frame = set_frame_2d_positional(
+            internal_frame=self._modin_frame,
+            index=row_positions_frame,
+            columns=column_positions,
+            set_as_coords=set_as_coords,
+            item=item if is_scalar(item) else item._modin_frame,
+            is_item_series=is_item_series,
+        )
+
+        return SnowflakeQueryCompiler(result_frame)
+
+    def getitem_array(self, key: "SnowflakeQueryCompiler") -> "SnowflakeQueryCompiler":
+        """
+        Mask QueryCompiler with `key`. This functions supports 3 different types of masks:
+
+        1. boolean series: A boolean series is used to denote which row to return. E.g.
+                           for key=[True, False, False, True] on a DataFrame with 4 rows,
+                           getitem_array will return the first and last row.
+        2. integer series: A list of integers in range (-n, n-1) with n being the number of rows.
+                           getitem_array will return all rows specified through the integers. E.g., [1, 3, 1] will
+                           return the second, fourth and second row (duplicates ok).
+        3. arbitrary series: If key is neither boolean nor integer, getitem_array will mask column and defer the call
+                             to get_frame_by_col_label. Here, the mask is a column mask of pandas column labels.
+
+        Use getitem_array whenever you want to "mask" rows or columns through a series.
+
+        Parameters
+        ----------
+        key : SnowflakeQueryCompiler, np.ndarray or list of column labels
+            Boolean mask represented by QueryCompiler or ``np.ndarray`` of the same
+            shape as `self`, or enumerable of columns to pick.
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New masked QueryCompiler.
+        """
+
+        # Non query compiler cases have been handled above, handle here lazy eval case:
+        assert isinstance(key, SnowflakeQueryCompiler)
+        assert len(key.dtypes) == 1, "key must be 1-d series"
+
+        key_dtype = key.dtypes[0]
+
+        # boolean type indicates masked indexing
+        if is_bool_dtype(key_dtype):
+            # ensure that key is a series
+            if key.get_axis_len(axis=0) != self.get_axis_len(axis=0):
+                error_msg = f"Item wrong length {key.get_axis_len(axis=0)} instead of {self.get_axis_len(axis=0)}."
+                raise ValueError(error_msg)
+
+            new_frame = _get_frame_by_row_series_bool(
+                self._modin_frame, key._modin_frame
+            )
+            return SnowflakeQueryCompiler(new_frame)
+
+        # integer type indicates positional indexing
+        elif is_integer_dtype(key_dtype):
+            new_frame = get_frame_by_row_pos_frame(  # pragma: no cover
+                internal_frame=self._modin_frame, key=key._modin_frame
+            )
+            return SnowflakeQueryCompiler(new_frame)
+
+        # all other indexing is retrieving columns
+        return SnowflakeQueryCompiler(  # pragma: no cover
+            get_frame_by_col_label(internal_frame=self._modin_frame, col_loc=key)
+        )
+
+    def getitem_row_array(
+        self, key: Union[list[Any], "pd.Series", InternalFrame]
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Get row data for target (positional) indices.
+
+        Parameters
+        ----------
+        key : list-like, Snowpark pandas Series, InternalFrame
+            Numeric indices of the rows to pick.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New QueryCompiler that contains specified rows.
+        """
+
+        from snowflake.snowpark.modin.pandas import Series
+
+        # convert key to internal frame via Series
+        key_frame = None
+        if isinstance(key, Series):
+            key_frame = key._query_compiler._modin_frame  # pragma: no cover
+        elif isinstance(key, InternalFrame):
+            key_frame = key  # pragma: no cover
+        elif is_list_like(key):
+            key_frame = Series(key)._query_compiler._modin_frame
+
+        new_frame = get_frame_by_row_pos_frame(
+            self._modin_frame, key_frame
+        )  # pragma: no cover
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def mask(
+        self,
+        cond: "SnowflakeQueryCompiler",
+        other: Optional[Union["SnowflakeQueryCompiler", Scalar]],
+        axis: Optional[int] = None,
+        level: Optional[int] = None,
+        cond_fillna_with_true: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Replace values where the condition is True.
+
+        Parameters
+        ----------
+        cond : SnowflakeQueryCompiler
+            Where cond is False, keep the original value otherwise replace with corresponding value from other.
+
+        other : Optional Scalar or SnowflakeQueryCompiler
+            Entries where cond is True are replaced with corresponding value from other.  To keep things simple
+            if the other is not a SnowflakeQueryCompiler or scalar primitive like int, float, str, bool then we
+            go through the fallback path.
+
+        axis : int, default None
+            Alignment axis if needed.  This will fallback if not the default.
+
+        level : int, default None
+            Alignment level if needed.  This will fallback if not the default.
+
+        needs_positional_join_for_cond : bool, default False
+            Align condition and self by position rather than labels. Necessary when condition is a NumPy object.
+
+        needs_positional_join_for_other : bool, default False
+            Align other and self by position rather than labels. Necessary when other is a NumPy object.
+
+        cond_fillna_with_true : bool, default False
+            Whether this codepath is being used for setitem. If so, instead of replacing values for which
+            the cond is not present (i.e. in the case that cond has fewer rows/cols than self), keep the
+            original values.
+
+        other_is_series_self_is_not : bool, default False
+            Whether this codepath is being used when self is a DataFrame, and other is a Series - which
+            requires parsing the axis argument.
+
+        self_and_cond_is_series : bool, default False
+            Whether this codepath is being used when both self and cond are Series - which requires matching
+            the data columns regardless of label.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New SnowflakeQueryCompiler with where result.
+        """
+        validate_expected_boolean_data_columns(cond._modin_frame)
+        cond = cond.invert()
+        return self.where(
+            cond,
+            other,
+            axis=axis,
+            level=level,
+            cond_fillna_with_true=cond_fillna_with_true,
+        )
+
+    def where(
+        self,
+        cond: "SnowflakeQueryCompiler",
+        other: Optional[Union["SnowflakeQueryCompiler", Scalar]],
+        axis: Optional[int] = None,
+        level: Optional[int] = None,
+        cond_fillna_with_true: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Replace values where the condition is False.
+
+        Parameters
+        ----------
+        cond : SnowflakeQueryCompiler
+            Where cond is True, keep the original value otherwise replace with corresponding value from other.
+
+        other : Optional Scalar or SnowflakeQueryCompiler
+            Entries where cond is False are replaced with corresponding value from other.  To keep things simple
+            if the other is not a SnowflakeQueryCompiler or scalar primitive like int, float, str, bool then we
+            go through the fallback path.
+
+        axis : int, default None
+            Alignment axis if needed.  This will fallback if not the default.
+
+        level : int, default None
+            Alignment level if needed.  This will fallback if not the default.
+
+        needs_positional_join_for_cond : bool, default False
+            Align condition and self by position rather than labels. Necessary when condition is a NumPy object.
+
+        needs_positional_join_for_other : bool, default False
+            Align other and self by position rather than labels. Necessary when other is a NumPy object.
+
+        cond_fillna_with_true : bool, default False
+            Whether this codepath is being used for setitem. If so, instead of replacing values for which
+            the cond is not present (i.e. in the case that cond has fewer rows/cols than self), keep the
+            original values, by filling in those values with True.
+
+        other_is_series_self_is_not : bool, default False
+            Whether this codepath is being used when self is a DataFrame, and other is a Series - which
+            requires parsing the axis argument.
+
+        self_and_cond_is_series : bool, default False
+            Whether this codepath is being used when both self and cond are Series - which requires matching
+            the data columns regardless of label.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New SnowflakeQueryCompiler with where result.
+        """
+        # Go through fallback path if level is specified, or other is not snowflake query compiler or
+        # involves more complex scalar type (not simple scalar types like int or float) then we defer to the fallback
+        # case to ensure better consistency with pandas.
+        from snowflake.snowpark.modin.pandas.utils import is_scalar
+
+        other_is_series_self_is_not = (getattr(self, "_shape_hint", None) is None) and (
+            getattr(other, "_shape_hint", None) == "column"
+        )
+        if (
+            (axis is not None and not other_is_series_self_is_not)
+            or level is not None
+            or (
+                other is not None
+                and not isinstance(other, SnowflakeQueryCompiler)
+                and not is_scalar(other)
+            )
+        ):
+            return DataFrameDefault.register(native_pd.DataFrame.where)(
+                self,
+                cond=cond,
+                other=other if other else None,
+                axis=axis,
+                level=level,
+            )
+
+        frame = self._modin_frame
+        cond_frame = cond._modin_frame
+        validate_expected_boolean_data_columns(cond_frame)
+
+        cond_frame.validate_no_duplicated_data_columns_mapped_for_labels(
+            frame.data_column_pandas_labels, "condition"
+        )
+        if isinstance(other, SnowflakeQueryCompiler):
+            other._modin_frame.validate_no_duplicated_data_columns_mapped_for_labels(
+                frame.data_column_pandas_labels, "other"
+            )
+
+        needs_positional_join_for_cond = getattr(
+            cond, "_shape_hint", None
+        ) == "array" or (
+            getattr(self, "_shape_hint", None) is None
+            and getattr(cond, "_shape_hint", None) == "column"
+        )
+        # align the frame and cond frame using left method
+        if not needs_positional_join_for_cond:
+            joined_frame, result_column_mapper = join_utils.align_on_index(
+                frame,
+                cond_frame,
+                how="left",
+            )
+            mapped_frame_quoted_identifiers = (
+                result_column_mapper.map_left_quoted_identifiers(
+                    frame.data_column_snowflake_quoted_identifiers
+                )
+            )
+
+            if (
+                getattr(self, "_shape_hint", None) != "column"
+                or getattr(cond, "_shape_hint", None) != "column"
+            ):
+                # for each data column in frame, find the column with same label in cond_frame
+                # in the joined frame
+                df_to_cond_identifier_mappings = (
+                    get_mapping_from_left_to_right_columns_by_label(
+                        frame.data_column_pandas_labels,
+                        mapped_frame_quoted_identifiers,
+                        cond_frame.data_column_pandas_labels,
+                        result_column_mapper.map_right_quoted_identifiers(
+                            cond_frame.data_column_snowflake_quoted_identifiers
+                        ),
+                    )
+                )
+            else:
+                assert (
+                    len(frame.data_column_snowflake_quoted_identifiers)
+                    == len(cond_frame.data_column_snowflake_quoted_identifiers)
+                    == 1
+                ), "Series object has multiple data columns."
+                # if both self and cond are series, we simply map the data columns to each other.
+                df_to_cond_identifier_mappings = {
+                    result_column_mapper.map_left_quoted_identifiers(
+                        frame.data_column_snowflake_quoted_identifiers
+                    )[0]: result_column_mapper.map_right_quoted_identifiers(
+                        cond_frame.data_column_snowflake_quoted_identifiers
+                    )[
+                        0
+                    ]
+                }
+        else:
+            joined_frame, result_column_mapper = join_utils.join(
+                frame,
+                cond_frame,
+                how="left",
+                left_on=[frame.row_position_snowflake_quoted_identifier],
+                right_on=[cond_frame.row_position_snowflake_quoted_identifier],
+            )
+
+            mapped_frame_quoted_identifiers = (
+                result_column_mapper.map_left_quoted_identifiers(
+                    frame.data_column_snowflake_quoted_identifiers
+                )
+            )
+
+            # Normally, we would use label based broadcasting; however, if we have
+            # made it to here in the codepath, we are either dealing with a NumPy Array
+            # that has the same shape as us, or a Series. If it is a Series and axis=0,
+            # there will only be one column, so we must broadcast it to all of the columns.
+            if len(cond_frame.data_column_pandas_labels) != 1:
+                df_to_cond_identifier_mappings = {
+                    df_col: cond_col
+                    for df_col, cond_col in zip(
+                        mapped_frame_quoted_identifiers,
+                        result_column_mapper.map_right_quoted_identifiers(
+                            cond_frame.data_column_snowflake_quoted_identifiers
+                        ),
+                    )
+                }
+            else:
+                cond_snowflake_quoted_identifier = (
+                    result_column_mapper.map_right_quoted_identifiers(
+                        cond_frame.data_column_snowflake_quoted_identifiers
+                    )[0]
+                )
+                df_to_cond_identifier_mappings = {
+                    frame_quoted_identifier: cond_snowflake_quoted_identifier
+                    for frame_quoted_identifier in mapped_frame_quoted_identifiers
+                }
+
+        # When using setitem, if cond has a smaller shape than self,
+        # we must fill in the missing values with True. This is a workaround
+        # that is necessary for df.setitem, as default behavior for where
+        # is to treat missing values as False.
+        if cond_fillna_with_true:
+            # Add additional rows if necessary.
+            fillnone_column_map = {
+                c: coalesce(c, pandas_lit(True))
+                for c in df_to_cond_identifier_mappings.values()
+                if c is not None
+            }
+            updated_results = (
+                joined_frame.update_snowflake_quoted_identifiers_with_expressions(
+                    fillnone_column_map
+                )
+            )
+            joined_frame = updated_results.frame
+            for k in df_to_cond_identifier_mappings.keys():
+                if (
+                    df_to_cond_identifier_mappings[k]
+                    in updated_results.old_id_to_new_id_mappings.keys()
+                ):
+                    df_to_cond_identifier_mappings[
+                        k
+                    ] = updated_results.old_id_to_new_id_mappings[
+                        df_to_cond_identifier_mappings[k]
+                    ]
+            # Add additional columns if necessary, and update `df_to_cond_identifier_mappings`
+            # with new columns.
+            updated_mappings = {}
+            missing_columns = [
+                df_col
+                for df_col, cond_col in df_to_cond_identifier_mappings.items()
+                if cond_col is None
+            ]
+            missing_columns += [
+                col
+                for col in frame.data_column_snowflake_quoted_identifiers
+                if col not in df_to_cond_identifier_mappings.keys()
+            ]
+            for df_col in missing_columns:
+                pandas_label = df_col.strip('"')
+                pandas_label += "_added_col_for_setitem"
+                joined_frame = joined_frame.append_column(
+                    pandas_label, pandas_lit(True)
+                )
+                updated_mappings[
+                    df_col
+                ] = joined_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    [pandas_label], include_index=False
+                )[
+                    0
+                ][
+                    0
+                ]
+            df_to_cond_identifier_mappings.update(updated_mappings)
+
+        other_value = None
+        needs_positional_join_for_other = getattr(other, "_shape_hint", None) == "array"
+        if isinstance(other, SnowflakeQueryCompiler):
+            other_frame = other._modin_frame
+            if not needs_positional_join_for_other:
+                if not other_is_series_self_is_not or axis == 0:
+                    # align other frame with the joined_frame (frame and cond) using left method
+                    joined_frame, result_column_mapper = join_utils.align_on_index(
+                        joined_frame,
+                        other_frame,
+                        how="left",
+                    )
+                else:
+                    other_frame = (
+                        SnowflakeQueryCompiler(other_frame).transpose()._modin_frame
+                    )
+                    joined_frame, result_column_mapper = join_utils.join(
+                        joined_frame,
+                        other_frame,
+                        how="cross",
+                        left_on=[],
+                        right_on=[],
+                    )
+            else:
+                joined_frame = joined_frame.ensure_row_position_column()
+                other_frame = other_frame.ensure_row_position_column()
+                joined_frame, result_column_mapper = join_utils.join(
+                    joined_frame,
+                    other_frame,
+                    how="left",
+                    left_on=[joined_frame.row_position_snowflake_quoted_identifier],
+                    right_on=[other_frame.row_position_snowflake_quoted_identifier],
+                )
+            # for each data column in frame, find the column with same label in other_frame
+            # in the joined frame.
+            mapped_frame_quoted_identifiers = (
+                result_column_mapper.map_left_quoted_identifiers(
+                    mapped_frame_quoted_identifiers
+                )
+            )
+            if not needs_positional_join_for_other:
+                if not (other_is_series_self_is_not and axis == 0):
+                    df_to_other_identifier_mappings = (
+                        get_mapping_from_left_to_right_columns_by_label(
+                            frame.data_column_pandas_labels,
+                            mapped_frame_quoted_identifiers,
+                            other_frame.data_column_pandas_labels,
+                            result_column_mapper.map_right_quoted_identifiers(
+                                other_frame.data_column_snowflake_quoted_identifiers
+                            ),
+                        )
+                    )
+                else:
+                    other_snowflake_quoted_identifier = (
+                        result_column_mapper.map_right_quoted_identifiers(
+                            other_frame.data_column_snowflake_quoted_identifiers
+                        )[0]
+                    )
+                    df_to_other_identifier_mappings = {
+                        frame_quoted_identifier: other_snowflake_quoted_identifier
+                        for frame_quoted_identifier in mapped_frame_quoted_identifiers
+                    }
+            else:
+                df_to_other_identifier_mappings = {
+                    df_col: other_col
+                    for df_col, other_col in zip(
+                        mapped_frame_quoted_identifiers,
+                        result_column_mapper.map_right_quoted_identifiers(
+                            other_frame.data_column_snowflake_quoted_identifiers
+                        ),
+                    )
+                }
+        else:
+            # If other is a scalar value or None, then we know the other_value directly here.
+            other_value = other
+            df_to_other_identifier_mappings = {}
+
+        # record all columns needed for the final result dataframe
+        where_selected_columns = []
+        # select all index columns
+        where_selected_columns += joined_frame.index_column_snowflake_quoted_identifiers
+        # retain all ordering columns that is missing in the index columns
+        missing_ordering_column_snowflake_quoted_identifiers = [
+            order_col.snowflake_quoted_identifier
+            for order_col in joined_frame.ordering_columns
+            if order_col.snowflake_quoted_identifier not in where_selected_columns
+        ]
+        where_selected_columns += missing_ordering_column_snowflake_quoted_identifiers
+
+        snowflake_quoted_identifier_to_data_type = (
+            joined_frame.quoted_identifier_to_snowflake_type()
+        )
+        new_data_column_snowflake_quoted_identifiers: list[ColumnOrName] = []
+        # go over the data columns from frame in the joined_frame, and for each column it checks:
+        # 1) if no matching condition column (the column in the condition frame that has same label), replace
+        #    it with the other value or matched other column. If no other value of matched other column is
+        #    available, replace it with lit(None).
+        # 2) if there is matching condition column, replace the elements whose corresponding condition value is
+        #    False with the other value or matched other column, or None if none is available.
+        for pandas_label, snowflake_quoted_identifier in zip(
+            frame.data_column_pandas_labels,
+            mapped_frame_quoted_identifiers,
+        ):
+            cond_snowflake_quoted_identifier = df_to_cond_identifier_mappings.get(
+                snowflake_quoted_identifier
+            )
+            other_snowflake_quoted_identifier = df_to_other_identifier_mappings.get(
+                snowflake_quoted_identifier
+            )
+            col_data_type = snowflake_quoted_identifier_to_data_type.get(
+                snowflake_quoted_identifier
+            )
+            # TODO (SNOW-904421): Other value can fail to cast in snowflake if not compatible type
+            if other_value:
+                other_col_or_literal = pandas_lit(other_value)
+                other_col_data_type = infer_object_type(other_value)
+                if not is_compatible_snowpark_types(other_col_data_type, col_data_type):
+                    other_col_or_literal = to_variant(other_col_or_literal)
+            elif other_snowflake_quoted_identifier:
+                other_col_or_literal = col(other_snowflake_quoted_identifier)
+                other_col_data_type = snowflake_quoted_identifier_to_data_type[
+                    other_snowflake_quoted_identifier
+                ]
+                if not is_compatible_snowpark_types(other_col_data_type, col_data_type):
+                    other_col_or_literal = to_variant(other_col_or_literal)
+            else:
+                other_col_or_literal = pandas_lit(None)
+
+            new_column_snowflake_quoted_identifier = (
+                joined_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                    pandas_labels=[pandas_label],
+                    excluded=new_data_column_snowflake_quoted_identifiers,
+                )[0]
+            )
+            if cond_snowflake_quoted_identifier is None:
+                where_selected_columns.append(
+                    other_col_or_literal.as_(new_column_snowflake_quoted_identifier),
+                )
+            else:
+                where_selected_columns.append(
+                    iff(
+                        col(cond_snowflake_quoted_identifier),
+                        col(snowflake_quoted_identifier),
+                        other_col_or_literal,
+                    ).as_(new_column_snowflake_quoted_identifier)
+                )
+            new_data_column_snowflake_quoted_identifiers.append(
+                new_column_snowflake_quoted_identifier
+            )
+
+        # select all column need to be selected/projected to create the final dataframe.
+        where_ordered_dataframe = joined_frame.ordered_dataframe.select(
+            where_selected_columns
+        )
+        new_frame = InternalFrame.create(
+            ordered_dataframe=where_ordered_dataframe,
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=joined_frame.index_column_snowflake_quoted_identifiers,
+        )
+        return SnowflakeQueryCompiler(new_frame)
+
+    def _make_fill_expression_for_column_wise_fillna(
+        self, snowflake_quoted_identifier: str, method: FillNAMethod
+    ) -> SnowparkColumn:
+        """
+        Helper function to get the Snowpark Column expression corresponding to snowflake_quoted_id when doing a column wise fillna.
+
+        Parameters
+        ----------
+        snowflake_quoted_identifier : str
+            The snowflake quoted identifier of the column that we are generating the expression for.
+        method : FillNAMethod
+            Enum representing if this method is a ffill method or a bfill method.
+
+        Returns
+        -------
+        Column
+            The Snowpark Column corresponding to the filled column.
+        """
+        method_is_ffill = method is FillNAMethod.FFILL_METHOD
+        len_ids = len(self._modin_frame.data_column_snowflake_quoted_identifiers)
+        # In pandas, columns are implicitly ordered. When doing a fillna on axis=1, we need to use this implicit
+        # ordering in order to determine what the "previous" column is to fill values in this column.
+        col_pos = self._modin_frame.data_column_snowflake_quoted_identifiers.index(
+            snowflake_quoted_identifier
+        )
+        # If we are looking at the first column and doing an ffill, or looking at the last column and doing a bfill,
+        # there are no other columns for us to coalesce with, so returning coalesce will error since it will be a
+        # coalesce with one column. Instead, we just return the column.
+        if (col_pos == 0 and method_is_ffill) or (
+            col_pos == len_ids - 1 and not method_is_ffill
+        ):
+            return col(snowflake_quoted_identifier)
+        if method_is_ffill:
+            return coalesce(
+                snowflake_quoted_identifier,
+                *self._modin_frame.data_column_snowflake_quoted_identifiers[:col_pos][
+                    ::-1
+                ],
+            )
+        else:
+            return coalesce(
+                snowflake_quoted_identifier,
+                *self._modin_frame.data_column_snowflake_quoted_identifiers[
+                    len_ids:col_pos:-1
+                ][::-1],
+            )
+
+    def fillna(
+        self,
+        value: Optional[Union[Hashable, Mapping, "pd.DataFrame", "pd.Series"]] = None,
+        *,
+        self_is_series: bool,
+        method: Optional[FillnaOptions] = None,
+        axis: Optional[Axis] = None,
+        limit: Optional[int] = None,
+        downcast: Optional[dict] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Replace NaN values using provided method.
+
+        Parameters
+        ----------
+        value : scalar or dict
+        method : {"backfill", "bfill", "pad", "ffill", None}
+        axis : {0, 1}
+        limit : int, optional
+        downcast : dict, optional
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with all null values filled.
+        """
+        default_class = SeriesDefault if self_is_series else DataFrameDefault
+        fallback_func = (
+            native_pd.Series.fillna if self_is_series else native_pd.DataFrame.fillna
+        )
+        use_fallback = False
+        if limit or downcast:
+            # fallback before having parallel implementation
+            # TODO: SNOW-891788 support limit
+            use_fallback = True
+
+        # case 1: fillna df with another df or fillna series with another series/dict
+        if (self_is_series and isinstance(value, (dict, pd.Series))) or (
+            not self_is_series and isinstance(value, pd.DataFrame)
+        ):
+            if isinstance(value, dict):
+                value = pd.Series(value)
+            return self.where(cond=self.notna(), other=value._query_compiler)
+
+        if use_fallback:
+            return default_class.register(fallback_func)(
+                self,
+                value,
+                method=method,
+                axis=axis,
+                limit=limit,
+                downcast=downcast,
+            )
+
+        # case 2: fillna with a method
+        if method is not None:
+            method = FillNAMethod.get_enum_for_string_method(method)
+            method_is_ffill = method is FillNAMethod.FFILL_METHOD
+            if axis == 0:
+                self._modin_frame = self._modin_frame.ensure_row_position_column()
+                if method_is_ffill:
+                    func = last_value
+                    window_start = Window.UNBOUNDED_PRECEDING
+                    window_end = Window.CURRENT_ROW
+                else:
+                    func = first_value
+                    window_start = Window.CURRENT_ROW
+                    window_end = Window.UNBOUNDED_FOLLOWING
+                fillna_column_map = {
+                    snowflake_quoted_id: coalesce(
+                        snowflake_quoted_id,
+                        func(snowflake_quoted_id, ignore_nulls=True).over(
+                            Window.order_by(
+                                self._modin_frame.row_position_snowflake_quoted_identifier
+                            ).rows_between(window_start, window_end)
+                        ),
+                    )
+                    for snowflake_quoted_id in self._modin_frame.data_column_snowflake_quoted_identifiers
+                }
+            else:
+                fillna_column_map = {
+                    snowflake_quoted_id: self._make_fill_expression_for_column_wise_fillna(
+                        snowflake_quoted_id, method
+                    )
+                    for snowflake_quoted_id in self._modin_frame.data_column_snowflake_quoted_identifiers
+                }
+        # case 3: fillna with a mapping
+        else:
+            # we create a mapping from column label to the fillin value and use coalesce to implement fillna
+            if axis == 1 and isinstance(value, (dict, pd.Series)):
+                # same as pandas
+                raise ErrorMessage.not_implemented(
+                    "Currently only can fill with dict/Series column by column"
+                )
+            from snowflake.snowpark.modin.pandas.utils import is_scalar
+
+            # prepare label_to_value_map
+            if is_scalar(value):
+                label_to_value_map = {label: value for label in self.columns}
+            elif isinstance(value, dict):
+                label_to_value_map = fillna_label_to_value_map(value, self.columns)
+            else:
+                # TODO: SNOW-899804 alternative way to implement this fully on backend
+                assert isinstance(value, pd.Series), "invalid value type {type(value)}"
+                value = value.to_pandas()
+                # deduplicate and keep first mapping
+                value = value[~value.index.duplicated(keep="first")].to_dict()
+                label_to_value_map = fillna_label_to_value_map(value, self.columns)
+
+            if not label_to_value_map:
+                # mapping is empty
+                return self
+
+            # the rest code iterates over all labels with a fill value and for each label, create a snowpark column that
+            # fill null with corresponding value using coalesce
+            labels = list(label_to_value_map.keys())
+            id_tuples = self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                pandas_labels=labels,
+                include_index=False,
+            )
+            fillna_column_map = {}
+            for label, id_tuple in zip(labels, id_tuples):
+                for id in id_tuple:
+                    val = label_to_value_map[label]
+                    fillna_column_map[id] = coalesce(id, pandas_lit(val))
+
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                fillna_column_map
+            ).frame
+        )
+
+    def dropna(
+        self,
+        axis: int,
+        how: Literal["any", "all"],
+        thresh: Optional[Union[int, lib.NoDefault]] = lib.no_default,
+        subset: IndexLabel = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Remove missing values. If 'thresh' is specified then the 'how' parameter is ignored.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+        how : {"any", "all"}
+        thresh : int
+        subset : list of labels
+
+        New QueryCompiler with null values dropped along given axis.
+        """
+        if axis == 1:
+            return DataFrameDefault.register(native_pd.DataFrame.dropna)(
+                self,
+                axis=axis,
+                how=how,
+                thresh=thresh,
+                subset=subset,
+            )
+
+        # reuse Snowpark Dataframe's dropna API and make sure to define subset correctly, i.e., only contain data
+        # columns
+        subset_data_col_ids = [
+            id
+            for label, id in zip(
+                self._modin_frame.data_column_pandas_labels,
+                self._modin_frame.data_column_snowflake_quoted_identifiers,
+            )
+            if not subset or label in subset
+        ]
+        if thresh is lib.no_default:
+            thresh = None
+
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=self._modin_frame.ordered_dataframe.dropna(
+                    how=how, thresh=thresh, subset=subset_data_col_ids
+                ),
+                data_column_pandas_labels=self._modin_frame.data_column_pandas_labels,
+                data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+                data_column_snowflake_quoted_identifiers=self._modin_frame.data_column_snowflake_quoted_identifiers,
+                index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    def set_index_names(
+        self, names: list[Hashable], axis: Optional[int] = 0
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Set index names for the specified axis.
+
+        Parameters
+        ----------
+        names : list
+            New index names. Length must be equal to number of levels in index.
+        axis : {0, 1}, default: 0
+            Axis to set names along.
+        """
+        if axis == 1:
+            return self.set_columns(self.columns.set_names(names))
+        else:
+            frame = self._modin_frame
+            if len(names) != frame.num_index_columns:
+                # Same error as native pandas.
+                raise ValueError(
+                    "Length of names must match number of levels in MultiIndex."
+                )
+
+            # Rename pandas labels.
+            frame = InternalFrame.create(
+                ordered_dataframe=frame.ordered_dataframe,
+                data_column_pandas_labels=frame.data_column_pandas_labels,
+                data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+                data_column_pandas_index_names=frame.data_column_pandas_index_names,
+                index_column_pandas_labels=names,
+                index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+            )
+
+            return SnowflakeQueryCompiler(frame)
+
+    def setitem(
+        self,
+        axis: int,
+        key: IndexLabel,
+        value: Union["SnowflakeQueryCompiler", list[Any], Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Set the row/column defined by `key` to the `value` provided.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to set `value` along. 0 means across rows, 1 across columns. This may be confusing at first -
+            but is original Modin logic - because axis=0 means here assigning `value` across rows, i.e. adding or replacing
+            a new column. For axis=1, assigning `value` across columns this equals assigning a single, full row.
+            E.g., _setitem_positional(...) in the context of iloc invokes the axis=1 case and
+            df[['a', 'b']] = ... the axis=0 case.
+        key : label
+            Row/column label to set `value` in.
+        value : BaseQueryCompiler, list-like or scalar
+            Define new row/column value.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            New QueryCompiler with updated `key` value.
+        """
+
+        # use fallback for axis=1 which is similar to loc functionality. Setitem for axis=1
+        # should be done as part of write scenarios for .loc tracked in SNOW-812522.
+        # Efficient implementation requires transpose of single-row.
+        if 1 == axis:
+
+            def setitem(
+                df: pd.DataFrame,
+                key: IndexLabel,
+                value: Union["SnowflakeQueryCompiler", list[Any], Any],
+            ) -> pd.DataFrame:
+                # no cover here, because executed remotely
+                from snowflake.snowpark.modin.pandas.utils import (
+                    is_scalar,  # pragma: no cover
+                )
+
+                if is_scalar(key) and isinstance(
+                    value, native_pd.DataFrame
+                ):  # pragma: no cover
+                    value = value.squeeze()  # pragma: no cover
+                df.loc[key] = value  # pragma: no cover
+                return df  # pragma: no cover
+
+            return DataFrameDefault.register(setitem)(self, key=key, value=value)
+
+        # for axis=0, update column for key
+        loc = self._modin_frame.data_column_pandas_labels.index(key)
+
+        # list_like -> must match length for non-empty df
+        if is_list_like(value):
+            row_count = self.get_axis_len(axis=0)
+            if 0 != row_count:
+                if len(value) != row_count:
+                    raise ValueError(
+                        f"Length of values ({len(value)}) does not match length of index ({row_count})"
+                    )
+
+            # create series out of key and insert
+            value = pd.Series(value)._query_compiler
+
+        return self.insert(loc, key, value, True, replace=True)
+
+    def _make_discrete_difference_expression(
+        self,
+        periods: int,
+        column_position: int,
+        axis: int,
+    ) -> SnowparkColumn:
+        """
+        Helper function to generate Columns for discrete difference.
+
+        Parameters
+        ----------
+        periods : int
+            Periods to shift for calculating difference, accepts negative values.
+        column_position : int
+            The index of the column in self._modin_frame.data_column_snowflake_quoted_identifiers
+            for which to calculate the discrete difference. We use position since diff on axis=1
+            will use the ordering of the columns denoted by their position to determine which column
+            to compute the difference with.
+        axis : int {0 or 1}
+            The axis over which to compute the discrete difference.
+
+        Returns
+        -------
+        SnowparkColumn
+            An expression to generate the discrete difference along the specified axis, with the
+            specified period, for the column specified by `column_position`.
+        """
+        column_datatype_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+        # If periods is 0, we are doing a subtraction with self (or XOR in case of bool
+        # dtype). In this case, even if axis is 0, we prefer to use the col-wise code,
+        # since it is more efficient to just subtract (or xor) the columns, than to
+        # produce the Windows necessary for the row-wise codepath.
+        if axis == 0 and periods != 0:
+            snowflake_quoted_identifier = (
+                self._modin_frame.data_column_snowflake_quoted_identifiers[
+                    column_position
+                ]
+            )
+            column_datatype = column_datatype_map.get(snowflake_quoted_identifier)
+            # When computing the discrete difference over axis=0, we are basically
+            # subtracting each row from the row `periods` previous. We can achieve
+            # this using lag (or lead if periods is negative), as that replicates
+            # the current column, but vertically offset by periods.
+            func_for_other = lead if periods < 0 else lag
+            # If the column is of type bool, pandas uses XOR rather than subtraction.
+            if isinstance(column_datatype, BooleanType):
+                col1 = col(snowflake_quoted_identifier)
+                col2 = func_for_other(
+                    snowflake_quoted_identifier, offset=abs(periods)
+                ).over(
+                    Window.order_by(
+                        self._modin_frame.ordering_column_snowflake_quoted_identifiers
+                    )
+                )
+                return (col1 | col2) & (not_(col1 & col2))
+            else:
+                return col(snowflake_quoted_identifier) - func_for_other(
+                    snowflake_quoted_identifier, offset=abs(periods)
+                ).over(
+                    Window.order_by(
+                        self._modin_frame.ordering_column_snowflake_quoted_identifiers
+                    )
+                )
+        else:
+            # periods is the number of columns to *go back*.
+            periods *= -1
+            other_column_position = column_position + periods
+            # In this case, we are at a column that does not have a match, because the period
+            # takes us out of bounds. pandas returns a column of NaN's, regardless of the dtype
+            # of the column.
+            if other_column_position < 0 or other_column_position >= len(
+                self._modin_frame.data_column_snowflake_quoted_identifiers
+            ):
+                return pandas_lit(np.nan)
+            # In this case, we are at a column that does have a match, so we must do dtype checking
+            # and then generate the expression.
+            else:
+                col1_snowflake_quoted_identifier = (
+                    self._modin_frame.data_column_snowflake_quoted_identifiers[
+                        column_position
+                    ]
+                )
+                col2_snowflake_quoted_identifier = (
+                    self._modin_frame.data_column_snowflake_quoted_identifiers[
+                        other_column_position
+                    ]
+                )
+                col1_dtype = column_datatype_map.get(col1_snowflake_quoted_identifier)
+                col2_dtype = column_datatype_map.get(col2_snowflake_quoted_identifier)
+                col1 = col(col1_snowflake_quoted_identifier)
+                col2 = col(col2_snowflake_quoted_identifier)
+                # If both columns are of type bool, pandas uses XOR rather than subtraction.
+                # If only one is boolean, we cast it to an integer, and use subtraction.
+                if isinstance(col1_dtype, BooleanType) and isinstance(
+                    col2_dtype, BooleanType
+                ):
+                    return (col1 | col2) & (not_(col1 & col2))
+                else:
+                    if isinstance(col1_dtype, BooleanType):
+                        col1 = cast(col1, IntegerType())
+                    if isinstance(col2_dtype, BooleanType):
+                        col2 = cast(col2, IntegerType())
+                    return col1 - col2
+
+    def diff(self, periods: int, axis: int) -> "SnowflakeQueryCompiler":
+        """
+        Find discrete difference along axis.
+        Args:
+            periods : int
+                Periods to shift for calculating difference, accepts negative values.
+            axis : int
+                Take difference over rows (0) or columns (1).
+        Returns:
+            New SnowflakeQueryCompiler with discrete differences.
+        """
+        diff_label_to_value_map = {
+            col_name: self._make_discrete_difference_expression(periods, col_pos, axis)
+            for col_pos, col_name in enumerate(
+                self._modin_frame.data_column_snowflake_quoted_identifiers
+            )
+        }
+        return SnowflakeQueryCompiler(
+            self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                diff_label_to_value_map
+            ).frame
+        )
+
+    def drop(
+        self,
+        index: Optional[Sequence[Hashable]] = None,
+        columns: Optional[Sequence[Hashable]] = None,
+        level: Optional[Level] = None,
+        errors: Literal["raise", "ignore"] = "raise",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Drop specified rows or columns.
+        Args:
+            index : list of labels, optional
+              Labels of rows to drop.
+            columns : list of labels, optional
+              Labels of columns to drop.
+            level: int or level name, optional
+              For MultiIndex, level from which the labels will be removed. If 'index'
+              and 'columns' both are provided. This level is applicable to both.
+            errors : str, default: "raise"
+              If 'ignore', suppress error and only existing labels are dropped.
+        Returns:
+            New SnowflakeQueryCompiler with removed data.
+        """
+        frame = self._modin_frame
+        if index is not None:
+            frame = self._drop_axis_0(index, level, errors)._modin_frame
+        if columns is not None:
+            if level is not None:
+                level = frame.parse_levels_to_integer_levels([level], False, axis=1)[0]
+            data_column_labels_to_drop = []
+            missing_labels = []
+            for label_to_drop in columns:
+                matched_labels = []
+                for label in frame.data_column_pandas_labels:
+                    if label_prefix_match(label, {label_to_drop: 1}, level):
+                        matched_labels.append(label)
+                    elif (
+                        level is None
+                        and label_to_drop == tuple()
+                        and frame.is_multiindex(axis=1)
+                    ):
+                        # Empty tuple matches with everything if column index
+                        # is multi-index. This behavior is same as native pandas.
+                        matched_labels.append(label)
+                data_column_labels_to_drop.extend(matched_labels)
+                if not matched_labels:
+                    missing_labels.append(label_to_drop)
+
+            if missing_labels and errors == "raise":
+                # This error message is slightly different from native pandas.
+                # Native pandas raises following variations depending on input arguments
+                # KeyError: {missing_labels}
+                # KeyError: labels {missing_labels} not found in axis/level
+                # KeyError: {missing_labels} not found in axis/level
+                # In Snowpandas we raise consistent error message.
+                target = "level" if level is not None else "axis"
+                raise KeyError(f"labels {missing_labels} not found in {target}")
+
+            data_column_labels = []
+            data_column_identifiers = []
+            for label, identifiers in zip(
+                frame.data_column_pandas_labels,
+                frame.data_column_snowflake_quoted_identifiers,
+            ):
+                if label not in data_column_labels_to_drop:
+                    data_column_labels.append(label)
+                    data_column_identifiers.append(identifiers)
+
+            frame = InternalFrame.create(
+                ordered_dataframe=frame.ordered_dataframe,
+                index_column_pandas_labels=frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+                data_column_pandas_labels=data_column_labels,
+                data_column_snowflake_quoted_identifiers=data_column_identifiers,
+                data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            )
+            frame = frame.select_active_columns()
+
+        return SnowflakeQueryCompiler(frame)
+
+    def _drop_axis_0(
+        self,
+        index: Sequence[Hashable],
+        level: Optional[Level] = None,
+        errors: Literal["raise", "ignore"] = "raise",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Drop specified rows from the frame.
+        Args:
+            index : list of labels of rows to drop
+            level: int or level name, optional
+              For MultiIndex, level from which the labels will be removed. If 'index'
+              and 'columns' both are provided. This level is applicable to both.
+            errors : str, default: "raise"
+              If 'ignore', suppress error and only existing labels are dropped.
+        Returns:
+            New SnowflakeQueryCompiler with removed data.
+        """
+        frame = self._modin_frame
+        if level is not None:
+            level = frame.parse_levels_to_integer_levels([level], False)[0]
+        # filter expression to match all the provided labels. Rows matching these
+        # index labels will be dropped from frame.
+        filter_exp = None
+        missing_labels = []
+        for label in index:
+            label_filter = get_snowflake_filter_for_row_label(frame, label, level)
+            if errors == "raise" and (
+                label_filter is None
+                # We can potentially optimize this to perform check for all the
+                # labels in single sql query.
+                or count_rows(frame.ordered_dataframe.filter(label_filter)) == 0
+            ):
+                missing_labels.append(label)
+            else:
+                filter_exp = (
+                    label_filter if filter_exp is None else filter_exp | label_filter
+                )
+
+        if missing_labels:
+            # This error message is slightly different from native pandas.
+            # Native pandas raises following variations depending on input arguments
+            # KeyError: {missing_labels}
+            # KeyError: labels {missing_labels} not found in axis/level
+            # KeyError: {missing_labels} not found in axis/level
+            # In Snowpandas we raise consistent error message.
+            target = "level" if level is not None else "axis"
+            raise KeyError(f"labels {missing_labels} not found in {target}")
+
+        ordered_dataframe = frame.ordered_dataframe
+        if filter_exp is not None:
+            ordered_dataframe = ordered_dataframe.filter(not_(filter_exp))
+        frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+            data_column_pandas_labels=frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+        )
+        return SnowflakeQueryCompiler(frame)
+
+    def columnarize(self) -> "SnowflakeQueryCompiler":
+        """
+        Transpose this QueryCompiler if it has a single row but multiple columns.
+
+        This method should be called for QueryCompilers representing a Series object.
+
+        NOTE: Columnarize is brittle, and there have been some attempts to remove it
+        from upstream modin because it essentially makes a guess as to whether a
+        transpose should occur or not. Mahesh made an attempt here:
+           https://github.com/modin-project/modin/issues/6111
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            Transposed new QueryCompiler or self.
+        """
+        if self._shape_hint == "column":
+            return self  # pragma: no cover
+
+        # Transpose the frame if it has a single row and not one column.
+        # The modin code also checks the case when it is single row, and the row
+        # is a transpose of unnamed series, it will also transpose it back,
+        # len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
+        #
+        # We do not have such use case in Snowpark pandas.
+        #
+        # Many operations (sum, count) may result in a series with a single row
+        # and one column from a redeuced dimension, so each of those operations
+        # may need to independently perform a transpose directly so as to not
+        # depend on this function entirely. See BasePandasDataset.aggregate()
+        # for an example of this.
+        if len(self.columns) != 1 and self.get_axis_len(0) == 1:
+            return self.transpose()
+
+        return self
+
+    def dt_property(self, property_name: str) -> "SnowflakeQueryCompiler":
+        """
+        Extracts the specified date or time part from the timestamp.
+        """
+        assert len(self.columns) == 1, "dt only works for series"
+
+        # mapping from the property name to the corresponding snowpark function
+        dt_property_to_function_map = {
+            "date": to_date,
+            "hour": hour,
+            "minute": minute,
+            "second": second,
+            "day": dayofmonth,
+            "month": month,
+            "year": year,
+            "quarter": quarter,
+        }
+        property_function = dt_property_to_function_map.get(property_name)
+        if not property_function:
+            raise ErrorMessage.not_implemented(
+                f"dt.{property_name} is currently not supported!"
+            )  # pragma: no cover
+
+        internal_frame = self._modin_frame
+        snowpark_column = property_function(
+            internal_frame.data_column_snowflake_quoted_identifiers[0]
+        )
+        internal_frame_with_property_column = internal_frame.append_column(
+            internal_frame.data_column_pandas_labels[0], snowpark_column
+        )
+
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=internal_frame_with_property_column.ordered_dataframe,
+                # the result data column is the last data column of internal_frame_with_property_column
+                data_column_pandas_labels=internal_frame_with_property_column.data_column_pandas_labels[
+                    -1:
+                ],
+                data_column_pandas_index_names=internal_frame_with_property_column.data_column_pandas_index_names,
+                # the result data column is the last data column of internal_frame_with_property_column
+                data_column_snowflake_quoted_identifiers=internal_frame_with_property_column.data_column_snowflake_quoted_identifiers[
+                    -1:
+                ],
+                index_column_pandas_labels=internal_frame_with_property_column.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=internal_frame_with_property_column.index_column_snowflake_quoted_identifiers,
+            )
+        )
+
+    def isin(
+        self,
+        values: Union[
+            list[Any], np.ndarray, "SnowflakeQueryCompiler", dict[Hashable, ListLike]
+        ],
+    ) -> "SnowflakeQueryCompiler":  # noqa: PR02
+        """
+        Check for each element of `self` whether it's contained in passed `values`.
+        Parameters
+        ----------
+        values : list-like, np.array, SnowflakeQueryCompiler or dict of pandas labels -> listlike
+            Values to check elements of self in. If given as dict, match ListLike to column label given as key.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            Boolean mask for self of whether an element at the corresponding
+            position is contained in `values`.
+        """
+        is_snowflake_query_compiler = isinstance(values, SnowflakeQueryCompiler)  # type: ignore[union-attr]
+        is_series = is_snowflake_query_compiler and values.is_series_like()  # type: ignore[union-attr]
+        type_map = self._modin_frame.quoted_identifier_to_snowflake_type()
+
+        # convert list-like values to [lit(...), ..., lit(...)] and determine type
+        # which is required to produce correct isin expression using array_contains(...) below
+        if isinstance(values, (list, np.ndarray)):
+            values_dtype, values = convert_values_to_list_of_literals_and_return_type(
+                values
+            )
+        elif isinstance(values, dict):
+            values = {
+                k: convert_values_to_list_of_literals_and_return_type(v)
+                for k, v in values.items()
+            }
+
+        if isinstance(values, list):
+            # Apply isin(...) expression to each column.
+
+            # Construct directly array_contains(...) columnar expression based on scalar value from list.
+            # For each cell check whether it is contained in values. Handle empty list as special case, and simply replace with False.
+            # Use above helper function to generate columnar expressions.
+            new_frame = self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                {
+                    quoted_identifier: scalar_isin_expression(
+                        quoted_identifier,
+                        values,
+                        type_map[quoted_identifier],
+                        values_dtype,
+                    )
+                    for quoted_identifier in self._modin_frame.data_column_snowflake_quoted_identifiers
+                }
+            ).frame
+        elif isinstance(values, dict):
+            # Apply isin(...) expression to all columns with a label contained in values.keys(),
+            # all others should be returned as False (preserve nulls).
+            replace_dict = {
+                quoted_identifier: pandas_lit(False)
+                for quoted_identifier in self._modin_frame.data_column_snowflake_quoted_identifiers
+            }
+            # matching columns are updated based on the match from the set_frame_2d
+            frame = self._modin_frame
+            pairs = [
+                (label, identifier)
+                for label, identifier in zip(
+                    frame.data_column_pandas_labels,
+                    frame.data_column_snowflake_quoted_identifiers,
+                )
+                if label in values.keys()
+            ]
+
+            replace_dict.update(
+                {
+                    quoted_identifier: scalar_isin_expression(
+                        quoted_identifier,
+                        values[label][1],
+                        type_map[quoted_identifier],
+                        values[label][0],
+                    )
+                    for label, quoted_identifier in pairs
+                }
+            )
+
+            new_frame = frame.update_snowflake_quoted_identifiers_with_expressions(
+                replace_dict
+            ).frame
+        else:
+            assert isinstance(values, SnowflakeQueryCompiler)
+
+            # handle special case of self being empty dataframe
+            row_count = self.get_axis_len(axis=0)
+            if 0 == row_count:
+                # idempotent operation
+                return self
+
+            if is_series:
+                new_frame = compute_isin_with_series(
+                    self._modin_frame, values._modin_frame
+                )
+            else:
+                new_frame = compute_isin_with_dataframe(
+                    self._modin_frame, values._modin_frame
+                )
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def is_multiindex(self, *, axis: int = 0) -> bool:
+        """
+        Returns whether the InternalFrame of SnowflakeQueryCompiler has a MultiIndex along `axis`.
+        Args:
+            axis: If axis=0, return whether the InternalFrame has a MultiIndex as df.index.
+                If axis=1, return whether the InternalFrame has a MultiIndex as df.columns.
+        """
+        return self._modin_frame.is_multiindex(axis=axis)
+
+    def unary_op(self, op: str) -> "SnowflakeQueryCompiler":
+        """
+        Applies a unary operation `op` on each element of the `SnowflakeQueryCompiler`.
+
+        Parameters:
+        ----------
+        op : Name of unary operation.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A new SnowflakeQueryCompiler containing the unary operation `op` applied to each value.
+        """
+
+        # mapping from the unary op to the corresponding snowpark function
+        op_to_snowpark_function_map = {
+            "__neg__": negate,
+            "abs": abs_,
+        }
+
+        op_function = op_to_snowpark_function_map.get(op)
+
+        if not op_function:
+            raise ErrorMessage.not_implemented(
+                f"The unary operation {op} is currently not supported."
+            )  # pragma: no cover
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: op_function(col_name)
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def _make_rank_col_for_method(
+        self,
+        col_ident: str,
+        method: Literal["min", "first", "dense", "max", "average"],
+        na_option: Literal["keep", "top", "bottom"],
+        ascending: bool,
+        pct: bool,
+        row_val: str,
+        count_val: str,
+    ) -> SnowparkColumn:
+        """
+        Helper function to get the rank Snowpark Column for method parameters {"min", "first", "dense"} and
+        na_option parameters {"keep", "top", "bottom"}.
+
+        Parameters
+        ----------
+        col_ident : str
+            Column quoted identifier
+        method: str
+            Rank method value from {"min", "first", "dense", "max", "average}
+        na_option: str
+            Rank na_option value from {"keep", "top", "bottom"}
+        ascending: bool
+            Whether the elements should be ranked in ascending order.
+        pct: bool
+            Whether to display the returned rankings in percentile form.
+        row_val: str
+            Ordering column quoted identifier to get row value
+        count_val: str
+            Ordering column quoted identifier to get count value
+        Returns
+        -------
+        Column
+            The SnowparkColumn corresponding to the rank column.
+        """
+
+        # When na_option is 'top', null values are assigned the lowest rank. They need to be sorted before
+        # non-null values.
+        # For all other na_option {'keep', 'bottom'}, null values can be sorted after non-null values.
+        if ascending:
+            if na_option == "top":
+                col_ident_value = col(col_ident).asc_nulls_first()
+            else:
+                col_ident_value = col(col_ident).asc_nulls_last()
+        else:
+            # If ascending is false, need to sort column in descending order
+            if na_option == "top":
+                col_ident_value = col(col_ident).desc_nulls_first()
+            else:
+                col_ident_value = col(col_ident).desc_nulls_last()
+
+        # use Snowflake DENSE_RANK function when method is 'dense'.
+        if method == "dense":
+            rank_func = dense_rank()
+        else:  # methods 'min' and 'first' use RANK function
+            rank_func = rank()
+
+        # We want to calculate the rank within the ordered group of column values
+        order_by_list = [col_ident_value]
+        # When method is 'first', rank is assigned in order of the values appearing in the column.
+        # So we need to also order by the row position value.
+        if method == "first":
+            order_by_list += [row_val]
+        # For na_option {'keep', 'bottom'}, the rank column is calculated with the specified rank function and
+        # the order by clause
+
+        rank_col = rank_func.over(Window.order_by(order_by_list))
+
+        if method == "max":
+            rank_col = rank_col - 1 + count_val
+
+        if method == "average":
+            rank_col = (2 * rank_col - 1 + count_val) / 2
+
+        # For na_option 'keep', if the value is null then we assign it a null rank
+        if na_option == "keep":
+            rank_col = when(col(col_ident).is_null(), None).otherwise(rank_col)
+
+        if pct:
+            window = Window.order_by(col_ident_value).rows_between(
+                Window.unboundedPreceding, Window.unboundedFollowing
+            )
+            if method == "dense":
+                # dense rank uses the number of distinct values in column for percentile denominator to make sure rank
+                # scales to 100% while non-dense rank uses the total number of values for percentile denominator.
+                if na_option == "keep":
+                    # percentile denominator for dense rank is the number of distinct non-null values in the column
+                    total_cols = count_distinct(col(col_ident)).over(window)
+                else:
+                    # percentile denominator for dense rank is the distinct values in a column including nulls
+                    total_cols = (count_distinct(col(col_ident)).over(window)) + (
+                        sum_distinct(iff(col(col_ident).is_null(), 1, 0)).over(window)
+                    )
+            else:
+                if na_option == "keep":
+                    # percentile denominator for rank is the number of non-null values in the column
+                    total_cols = count(col(col_ident)).over(window)
+                else:
+                    # percentile denominator for rank is the total number of values in the column including nulls
+                    total_cols = count("*").over(window)
+            rank_col = rank_col / total_cols
+        return rank_col
+
+    def rank(
+        self,
+        axis: Axis = 0,
+        method: Literal["average", "min", "max", "first", "dense"] = "average",
+        numeric_only: bool = False,
+        na_option: Literal["keep", "top", "bottom"] = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Compute numerical rank along the specified axis.
+
+        By default, equal values are assigned a rank that is the average of the ranks
+        of those values, this behavior can be changed via `method` parameter.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+        method : {"average", "min", "max", "first", "dense"}
+            How to rank the group of records that have the same value (i.e. break ties):
+            - average: average rank of the group
+            - min: lowest rank in the group
+            - max: highest rank in the group
+            - first: ranks assigned in order they appear in the array
+            - dense: like 'min', but rank always increases by 1 between groups.
+        numeric_only : bool
+            For DataFrame objects, rank only numeric columns if set to True.
+        na_option : {"keep", "top", "bottom"}
+            How to rank NaN values:
+            - keep: assign NaN rank to NaN values
+            - top: assign lowest rank to NaN values
+            - bottom: assign highest rank to NaN values
+        ascending : bool
+            Whether the elements should be ranked in ascending order.
+        pct : bool
+            Whether to display the returned rankings in percentile form.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A new SnowflakeQueryCompiler of the same shape as `self`, where each element is the
+            numerical rank of the corresponding value along row or column.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(data={'values': [1, 2, np.nan, 2, 3, np.nan, 3]})
+        >>> df
+            values
+        0      1.0
+        1      2.0
+        2      NaN
+        3      2.0
+        4      3.0
+        5      NaN
+        6      3.0
+        >>> df['min'] = df['values'].rank(method='min', na_option='keep')
+        >>> df['dense'] = df['values'].rank(method='dense', na_option='keep')
+        >>> df['first'] = df['values'].rank(method='first', na_option='keep')
+        >>> df['max'] = df['values'].rank(method='max', na_option='keep')
+        >>> df['avg'] = df['values'].rank(method='average', na_option='keep')
+
+        Result of all methods using ascending order and na_option "keep" to assign NaN rank to NaN values.
+        >>> df
+            values      min      dense      first      max      avg
+        0      1.0      1.0      1.0        1.0        1.0      1.0
+        1      2.0      2.0      2.0        2.0        3.0      2.5
+        2      NaN      NaN      NaN        NaN        NaN      NaN
+        3      2.0      2.0      2.0        3.0        3.0      2.5
+        4      3.0      4.0      3.0        4.0        5.0      4.5
+        5      Nan      NaN      NaN        NaN        NaN      NaN
+        6      3.0      4.0      3.0        5.0        5.0      4.5
+        >>> df = pd.DataFrame(data={'values': [1, 2, np.nan, 2, 3, np.nan, 3]})
+        >>> df['min'] = df['values'].rank(method='min', na_option='top')
+        >>> df['dense'] = df['values'].rank(method='dense', na_option='top')
+        >>> df['first'] = df['values'].rank(method='first', na_option='top')
+        >>> df['max'] = df['values'].rank(method='max', na_option='top')
+        >>> df['avg'] = df['values'].rank(method='average', na_option='top')
+
+        Result of all methods using ascending order and na_option "top" to assign lowest rank to NaN values.
+        >>> df
+            values      min      dense      first      max      avg
+        0      1.0      3.0      2.0        3.0        3.0      3.0
+        1      2.0      4.0      3.0        4.0        5.0      4.5
+        2      NaN      1.0      1.0        1.0        2.0      1.5
+        3      2.0      4.0      3.0        5.0        5.0      4.5
+        4      3.0      6.0      4.0        6.0        7.0      6.5
+        5      Nan      1.0      1.0        2.0        2.0      1.5
+        6      3.0      6.0      4.0        7.0        7.0      6.5
+        >>> df = pd.DataFrame(data={'values': [1, 2, np.nan, 2, 3, np.nan, 3]})
+        >>> df['min'] = df['values'].rank(method='min', na_option='bottom')
+        >>> df['dense'] = df['values'].rank(method='dense', na_option='bottom')
+        >>> df['first'] = df['values'].rank(method='first', na_option='bottom')
+        >>> df['max'] = df['values'].rank(method='max', na_option='bottom')
+        >>> df['avg'] = df['values'].rank(method='average', na_option='bottom')
+
+        Result of all methods using descending order and na_option "bottom" to assign highest rank to NaN values.
+        >>> df
+            values      min      dense      first      max      avg
+        0      1.0      5.0      3.0        5.0        5.0      5.0
+        1      2.0      3.0      2.0        3.0        4.0      3.5
+        2      NaN      6.0      4.0        6.0        7.0      6.5
+        3      2.0      3.0      2.0        4.0        4.0      3.5
+        4      3.0      1.0      1.0        1.0        2.0      1.5
+        5      Nan      6.0      4.0        7.0        7.0      6.5
+        6      3.0      1.0      1.0        2.0        2.0      1.5
+
+        """
+        # Rank only works correctly on valid columns - e.g. when columns have either all
+        # numeric or all string values. Mixed type columns are considered nuisance columns
+        # in pandas in this case and are dropped from the final result. In Snowpark pandas, str values
+        # are given the highest rank.
+
+        if axis == 1:
+            ErrorMessage.not_implemented(
+                f"rank parameter axis={axis} not yet supported"
+            )
+
+        query_compiler = self
+        if numeric_only:
+            query_compiler = drop_non_numeric_data_columns(query_compiler, [])
+        original_frame = query_compiler._modin_frame
+        ordered_dataframe = original_frame.ordered_dataframe
+        row_val = original_frame.ordering_column_snowflake_quoted_identifiers[0]
+        rank_cols = {}
+        for col_name, col_ident in zip(
+            original_frame.data_column_pandas_labels,
+            original_frame.data_column_snowflake_quoted_identifiers,
+        ):
+            count_alias = ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["c_" + col_name]
+            )[0]
+            # Frame to record count of non-null values
+            count_df = ordered_dataframe.select(
+                col_ident,
+                count("*").over(Window.partition_by(col_ident)).alias(count_alias),
+            ).ensure_row_position_column()
+            count_val = col(count_df.projected_column_snowflake_quoted_identifiers[1])
+            rank_col = self._make_rank_col_for_method(
+                col_ident, method, na_option, ascending, pct, row_val, count_val
+            )
+            # Selects the correct method column from rank_df to be used for new_frame
+            rank_df_method = count_df.select(rank_col.alias(col_name + "_" + method))
+            rank_cols[col_ident] = col(
+                rank_df_method.projected_column_snowflake_quoted_identifiers[0]
+            )
+        new_frame = original_frame
+        new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+            rank_cols
+        ).frame
+
+        col_list = (
+            new_frame.index_column_snowflake_quoted_identifiers
+            + new_frame.data_column_snowflake_quoted_identifiers
+        )
+        new_frame = InternalFrame.create(
+            ordered_dataframe=new_frame.ordered_dataframe.select(col_list),
+            index_column_pandas_labels=new_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=new_frame.index_column_snowflake_quoted_identifiers,
+            data_column_pandas_labels=new_frame.data_column_pandas_labels,
+            data_column_snowflake_quoted_identifiers=new_frame.data_column_snowflake_quoted_identifiers,
+            data_column_pandas_index_names=new_frame.data_column_pandas_index_names,
+        )
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    # TODO (SNOW-971642): Add freq to DatetimeIndex.
+    # TODO (SNOW-975031): Investigate fully lazy resample implementation
+    def resample(
+        self,
+        resample_kwargs: dict[str, Any],
+        resample_method: ResampleMethodTypeLit,
+        resample_method_args: tuple[Any],
+        resample_method_kwargs: dict[str, Any],
+        is_series: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return new SnowflakeQueryCompiler whose ordered frame holds the result of a resample operation.
+
+        Parameters
+        ----------
+        resample_kwargs : Dict[str, Any]
+            Keyword arguments for the resample operation.
+
+        resample_method : ResampleMethodTypeLit
+            Resample method called on the Snowpark pandas object.
+
+        resample_method_args : Tuple[Any]
+            Keyword arguments passed to the resample method.
+
+        resample_method_kwargs : Dict[str, Any]
+            Keyword arguments passed to the resample method.
+
+        is_series : bool
+            Whether the resample method is applied on Series or not.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            Holds an ordered frame with the result of the resample operation.
+
+        Raises
+        ------
+        NotImplementedError
+            Raises a NotImplementedError if resample arguments are not supported by
+            Snowflake's execution engine.
+        """
+
+        validate_resample_supported_by_snowflake(resample_kwargs)
+
+        frame = self._modin_frame
+
+        snowflake_index_column_identifier = (
+            get_snowflake_quoted_identifier_for_resample_index_col(frame)
+        )
+
+        rule = resample_kwargs.get("rule")
+
+        _, slice_unit = rule_to_snowflake_width_and_slice_unit(rule)
+
+        min_max_index_column_quoted_identifier = (
+            frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["min_index", "max_index"]
+            )
+        )
+
+        # There are two reasons for why we eagerly compute these values:
+        # 1. The earliest date, start_date, is needed to perform resampling binning.
+        # 2. start_date and end_date are used to fill in any missing resample bins for the frame.
+
+        # date_trunc gives us the correct start date.
+        # For instance, if rule='3D' and the earliest date is
+        # 2020-03-01 1:00:00, the first date should be 2020-03-01,
+        # which is what date_trunc gives us.
+        start_date, end_date = frame.ordered_dataframe.agg(
+            date_trunc(slice_unit, min_(snowflake_index_column_identifier)).as_(
+                min_max_index_column_quoted_identifier[0]
+            ),
+            date_trunc(slice_unit, max_(snowflake_index_column_identifier)).as_(
+                min_max_index_column_quoted_identifier[1]
+            ),
+        ).collect()[0]
+
+        if resample_method == "ffill":
+            expected_frame = get_expected_resample_bins_frame(
+                rule, start_date, end_date
+            )
+
+            # The output frame's DatetimeIndex is identical to expected_frame's. For each date in the DatetimeIndex,
+            # a single row is selected from the input frame, where its date is the closest match earlier in time.
+            # We perform an ASOF join to accomplish this.
+            frame = perform_asof_join_on_frame(expected_frame, frame)
+
+        elif resample_method in IMPLEMENTED_AGG_METHODS:
+            frame = perform_resample_binning_on_frame(frame, start_date, rule)
+            qc = SnowflakeQueryCompiler(frame).groupby_agg(
+                by=self._modin_frame.index_column_pandas_labels,
+                agg_func=resample_method,
+                axis=resample_kwargs.get("axis", 0),
+                groupby_kwargs=dict(),
+                agg_args=resample_method_args,
+                agg_kwargs=resample_method_kwargs,
+                numeric_only=resample_method_kwargs.get("numeric_only", False),
+                is_series_groupby=is_series,
+            )
+            frame = fill_missing_resample_bins_for_frame(
+                qc._modin_frame, rule, start_date, end_date
+            )
+            if resample_method in ("sum", "count"):
+                # For these aggregations, we need to fill NaN values as 0
+                return SnowflakeQueryCompiler(frame).fillna(
+                    value=0, self_is_series=is_series
+                )
+        else:
+            ErrorMessage.not_implemented(
+                f"Resample Method {resample_method} has not been implemented."
+            )
+
+        return SnowflakeQueryCompiler(frame)
+
+    def value_counts(
+        self,
+        subset: Optional[Sequence[Hashable]] = None,
+        normalize: bool = False,
+        sort: bool = True,
+        ascending: bool = False,
+        bins: Optional[int] = None,
+        dropna: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Counts the number of unique values (frequency) of SnowflakeQueryCompiler.
+
+        The resulting object will be in descending order so that the
+        first element is the most frequently-occurring element.
+        Excludes NA values by default.
+
+        Args:
+            subset : label or list of labels, optional
+                Columns to use when counting unique combinations.
+            normalize : bool, default False
+                If True then the object returned will contain the relative
+                frequencies of the unique values.
+            sort : bool, default True
+                Sort by frequencies when True. Preserve the order of the data when False.
+            ascending : bool, default False
+                Sort in ascending order.
+            bins : int, optional
+                Rather than count values, group them into half-open bins,
+                a convenience for ``pd.cut``, only works with numeric data.
+                This argument is not supported yet.
+            dropna : bool, default True
+                Don't include counts of NaN.
+        """
+        # TODO: SNOW-924742 Support bins in Series.value_counts
+        if bins is not None:
+            raise ErrorMessage.not_implemented("bins argument is not yet supported")
+
+        if subset is not None:
+            if not isinstance(subset, (list, tuple)):
+                subset = [subset]
+            by = subset
+        else:
+            by = self._modin_frame.data_column_pandas_labels
+
+        # validate whether by is valid (e.g., contains duplicates or non-existing labels)
+        self.validate_groupby(by=by, axis=0, level=None)
+
+        # append a dummy column for count aggregation
+        COUNT_LABEL = "value_count"
+        query_compiler = SnowflakeQueryCompiler(
+            self._modin_frame.append_column(COUNT_LABEL, pandas_lit(1))
+        )
+
+        # count
+        query_compiler = query_compiler.groupby_agg(
+            by=by,
+            agg_func={COUNT_LABEL: "count"},
+            axis=0,
+            groupby_kwargs={"dropna": dropna},
+            agg_args=(),
+            agg_kwargs={},
+        )
+        internal_frame = query_compiler._modin_frame
+        count_identifier = internal_frame.data_column_snowflake_quoted_identifiers[0]
+
+        # use ratio_to_report function to calculate the percentage
+        # for example, if the frequencies of unique values are [2, 1, 1],
+        # they are normalized to percentages as [2/(2+1+1), 1/(2+1+1), 1/(2+1+1)] = [0.5, 0.25, 0.25]
+        # by default, ratio_to_report returns a decimal column, whereas pandas returns a float column
+        if normalize:
+            internal_frame = query_compiler._modin_frame.project_columns(
+                [COUNT_LABEL],
+                builtin("ratio_to_report")(col(count_identifier)).over(),
+            )
+            count_identifier = internal_frame.data_column_snowflake_quoted_identifiers[
+                0
+            ]
+
+        # When sort=True, sort by the frequency (count column);
+        # otherwise, respect the original order (use the original ordering columns)
+        ordered_dataframe = internal_frame.ordered_dataframe
+        if sort:
+            ordered_dataframe = ordered_dataframe.sort(
+                OrderingColumn(count_identifier, ascending=ascending)
+            )
+
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=ordered_dataframe,
+                index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+                # The result series of value_counts doesn't have a name, so set
+                # data_column_pandas_labels to [MODIN_UNNAMED_SERIES_LABEL]
+                # After pandas 2.0, it has a name `count` or `proportion`
+                data_column_pandas_labels=[MODIN_UNNAMED_SERIES_LABEL],
+                data_column_snowflake_quoted_identifiers=[count_identifier],
+                data_column_pandas_index_names=query_compiler._modin_frame.data_column_pandas_index_names,
+            )
+        )
+
+    def build_repr_df(
+        self,
+        num_rows_to_display: int,
+        num_cols_to_display: int,
+        times_symbol: str = "×",
+    ) -> tuple[int, int, pandas.DataFrame]:
+        """
+        Build pandas DataFrame for string representation.
+
+        Parameters
+        ----------
+        num_rows_to_display : int
+            Number of rows to show in string representation. If number of
+            rows in this dataset is greater than `num_rows` then half of
+            `num_rows` rows from the beginning and half of `num_rows` rows
+            from the end are shown.
+        num_cols_to_display : int
+            Number of columns to show in string representation. If number of
+            columns in this dataset is greater than `num_cols` then half of
+            `num_cols` columns from the beginning and half of `num_cols`
+            columns from the end are shown.
+        times_symbol : str
+            Symbol to use when breaking up DataFrame display to show number of rows x number of columns. Should be '×'
+            for HTML mode and 'x' for repr mode
+
+        Returns
+        -------
+        Tuple of row_count, col_count, pandas.DataFrame or pandas.Series
+            `row_count` holds the number of rows the DataFrame has, `col_count` the number of columns the DataFrame has, and
+            the pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
+        """
+        # In order to issue less queries, use following trick:
+        # 1. add the row count column holding COUNT(*) OVER () over the snowpark dataframe
+        # 2. retrieve all columns
+        # 3. filter on rows with recursive count
+
+        # Previously, 2 queries were issued, and a first version replaced them with a single query and a join
+        # the solution here uses a window function. This may lead to perf regressions, track these here SNOW-984177.
+        # Ensure that our reference to self._modin_frame is updated with cached row count and position.
+        self._modin_frame = (
+            self._modin_frame.ensure_row_position_column().ensure_row_count_column()
+        )
+        row_count_pandas_label = (
+            ROW_COUNT_COLUMN_LABEL
+            if len(self._modin_frame.data_column_pandas_index_names) == 1
+            else (ROW_COUNT_COLUMN_LABEL,)
+            * len(self._modin_frame.data_column_pandas_index_names)
+        )
+        frame_with_row_count_and_position = InternalFrame.create(
+            ordered_dataframe=self._modin_frame.ordered_dataframe,
+            data_column_pandas_labels=self._modin_frame.data_column_pandas_labels
+            + [row_count_pandas_label],
+            data_column_snowflake_quoted_identifiers=self._modin_frame.data_column_snowflake_quoted_identifiers
+            + [self._modin_frame.row_count_snowflake_quoted_identifier],
+            data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
+            index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        row_count_identifier = (
+            frame_with_row_count_and_position.row_count_snowflake_quoted_identifier
+        )
+        row_position_snowflake_quoted_identifier = (
+            frame_with_row_count_and_position.row_position_snowflake_quoted_identifier
+        )
+
+        # filter frame based on num_rows.
+        # always return all columns as this may also result in a query.
+        # in the future could analyze plan to see whether retrieving column count would trigger a query, if not
+        # simply filter out based on static schema
+        num_rows_for_head_and_tail = num_rows_to_display // 2 + 1
+        new_frame = frame_with_row_count_and_position.filter(
+            (
+                col(row_position_snowflake_quoted_identifier)
+                <= num_rows_for_head_and_tail
+            )
+            | (
+                col(row_position_snowflake_quoted_identifier)
+                >= col(row_count_identifier) - num_rows_for_head_and_tail
+            )
+        )
+
+        # retrieve frame as pandas object
+        new_qc = SnowflakeQueryCompiler(new_frame)
+        pandas_frame = new_qc.to_pandas()
+
+        # remove last column after first retrieving row count
+        row_count = 0 if 0 == len(pandas_frame) else pandas_frame.iat[0, -1]
+        pandas_frame = pandas_frame.iloc[:, :-1]
+        col_count = len(pandas_frame.columns)
+
+        return row_count, col_count, pandas_frame
+
+    def quantiles_along_axis0(
+        self,
+        q: list[float],
+        numeric_only: bool,
+        interpolation: Literal[
+            "linear", "lower", "higher", "midpoint", "nearest"
+        ] = "linear",
+        method: Literal["single", "table"] = "single",
+        index: Optional[Union[list[str], list[float]]] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Returns values at the given quantiles for each column.
+
+        Parameters
+        ----------
+        q: List[float]
+            A list of quantiles to compute. These will be the row labels of the output. Snowpark Pandas supports at most
+            MAX_QUANTILES_SUPPORTED (default: 16).
+        numeric_only: bool
+            Include only float, int, or boolean data.
+        interpolation: {"linear", "lower", "higher", "midpoint", "nearest"}
+            The interpolation method to use when the desired quantile lies between two data points in
+            a column. Because Snowflake's PERCENTILE_CONT function performs linear interpolation and
+            PERCENTILE_DISC finds the nearest value instead of interpolating, we only support those two arguments.
+        method: {"single", "table"}
+            When "single", computes percentiles against values within the column; when "table", computes
+            against values in the whole table. Currently, only "single" is supported.
+        index: Optional[List[str]], default None
+            When specified, sets the index column of the result to be this list. This is not part of
+            the pandas API for quantile, and only used to implement df.describe().
+            When unspecified, the index is the float values of the quantiles.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A query compiler representing a DataFrame, where the columns correspond to the columns of
+            the original frame, and each row has the value of the quantile for the corresponding column.
+            The resulting rows are match the order that they were specified in `q`.
+        """
+
+        if len(q) > MAX_QUANTILES_SUPPORTED:
+            # TODO: SNOW-1229442 Remove this code here and fix for large amount of quantiles.
+            # Implementation below uses UNION ALL. This results in a high query depth causing the query analyzer to
+            # produce a max recursion limit exceeded exception. Limit here to ensure performance.
+            ErrorMessage.not_implemented(
+                f"Snowpark pandas API supports at most {MAX_QUANTILES_SUPPORTED} quantiles."
+            )
+
+        query_compiler = self
+        if numeric_only:
+            query_compiler = drop_non_numeric_data_columns(query_compiler, [])
+        if query_compiler.dtypes.apply(is_datetime64_any_dtype).any():
+            # TODO SNOW-1003587
+            ErrorMessage.not_implemented(
+                "quantile is not supported for datetime columns"
+            )
+        original_frame = query_compiler._modin_frame
+        data_column_pandas_labels = original_frame.data_column_pandas_labels
+        if len(q) == 0:
+            # Return empty frame; each column should be float as if it held percentiles
+            return SnowflakeQueryCompiler.from_pandas(
+                native_pd.DataFrame(
+                    [],
+                    columns=data_column_pandas_labels,
+                    dtype=[np.float64] * len(data_column_pandas_labels),
+                )
+            )
+        index_column_snowflake_quoted_identifier = (
+            original_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[INDEX_LABEL],
+                wrap_double_underscore=True,
+            )[0]
+        )
+        global_ordering_identifier = (
+            original_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=[concat_utils.CONCAT_POSITION_COLUMN_LABEL],
+            )[0]
+        )
+        assert index is None or len(index) == len(
+            q
+        ), f"length of index {index} did not match quantiles {q}"
+        index_values = q if index is None else index
+        # For each quantile and an N-column dataframe, create a 1x(N+2) frame with a column
+        # for that quantile of the original column, one column with the quantile to use as the
+        # index later, and one column for global ordering. Each frame is union_all'd together.
+        ordered_dataframe = functools.reduce(
+            lambda ordered_dataframe, new_col_frame: ordered_dataframe.union_all(
+                new_col_frame
+            ),
+            itertools.starmap(
+                lambda i, quantile: append_columns(
+                    # Compute quantiles for each column
+                    self._modin_frame.ordered_dataframe.agg(
+                        *[
+                            column_quantile(col(ident), interpolation, quantile).as_(
+                                ident
+                            )
+                            for ident in original_frame.data_column_snowflake_quoted_identifiers
+                        ]
+                    ),
+                    # Append a new column with the appropriate index label,
+                    # and a global ordering column, since the result would otherwise sort rows by index
+                    [
+                        index_column_snowflake_quoted_identifier,
+                        global_ordering_identifier,
+                    ],
+                    [pandas_lit(index_values[i]), pandas_lit(i)],
+                ),
+                enumerate(q),
+            ),
+        )
+        # frontend ensured the result has at least one column
+        assert (
+            ordered_dataframe is not None
+        ), "frame must have at least one column call to quantile"
+        ordered_dataframe = ordered_dataframe.sort(
+            OrderingColumn(global_ordering_identifier),
+            *ordered_dataframe.ordering_columns,
+        )
+        return SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=ordered_dataframe,
+                data_column_pandas_labels=original_frame.data_column_pandas_labels,
+                data_column_pandas_index_names=[None],
+                data_column_snowflake_quoted_identifiers=original_frame.data_column_snowflake_quoted_identifiers,
+                index_column_pandas_labels=[None],
+                index_column_snowflake_quoted_identifiers=[
+                    index_column_snowflake_quoted_identifier
+                ],
+            )
+        )
+
+    def skew(
+        self,
+        axis: int,
+        skipna: bool,
+        numeric_only: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return unbiased skew, normalized over n-1
+
+        Parameters
+        ----------
+        axis: Optional[int]
+            Axis to calculate skew on, only 0 (columnar) is supported
+        skipna: Optional[bool]
+            Exclude NA values when calculating result ( only True is supported )
+        numeric_only: Optional[bool]
+            Include only the numeric columns ( only True is supported )
+        level: Optional[bool]
+            Not Supported, included for compatibility with other stats calls
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A query compiler containing skew for the numeric columns.
+        """
+        if axis == 1:
+            raise ErrorMessage.not_implemented("axis = 1 not supported for skew")
+
+        if numeric_only is not True:
+            raise ErrorMessage.not_implemented(
+                "numeric_only = False argument not supported for skew"
+            )
+
+        result = self.agg(
+            func="skew",
+            axis=0 if axis is None else axis,
+            args={},
+            kwargs={"numeric_only": numeric_only, "level": None, "skipna": True},
+        )
+        return result
+
+    def describe(
+        self,
+        percentiles: np.ndarray,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Summarizes statistics for the SnowflakeQueryCompiler.
+
+        Parameters
+        ----------
+        percentiles: np.ndarray
+            A list of percentiles to include in the output. Normalized by the frontend to be between 0 and 1.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            A query compiler containing descriptive statistics for this query compiler object.
+        """
+        # Per pandas docs, a described frame/series will have the following rows:
+        # >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
+        # ...                    'numeric': [1, 2, 3],
+        # ...                    'object': ['a', 'b', 'c']
+        # ...                   })
+        # >>> df.describe(include='all')
+        #        categorical  numeric object
+        # count            3      3.0      3
+        # unique           3      NaN      3
+        # top              f      NaN      a
+        # freq             1      NaN      1
+        # mean           NaN      2.0    NaN
+        # std            NaN      1.0    NaN
+        # min            NaN      1.0    NaN
+        # 25%            NaN      1.5    NaN
+        # 50%            NaN      2.0    NaN
+        # 75%            NaN      2.5    NaN
+        # max            NaN      3.0    NaN
+        sorted_percentiles = sorted(percentiles)
+        dtypes = self.dtypes
+        query_compiler = self
+        internal_frame = query_compiler._modin_frame
+        # Compute count for all columns regardless of dtype
+        query_compilers_to_concat = [
+            query_compiler.agg(["count"], axis=0, args=[], kwargs={})
+        ]
+        # Separate object, numeric, and datetime columns to compute different statistics.
+        # Datetime columns are treated as numeric, and have all statistics computed EXCEPT std.
+        # If datetime columns appear in the same frame as other numeric ones, the `std` row appears
+        # as the last row in the describe frame instead of its usual position.
+        obj_column_pos = []
+        numeric_column_pos = []
+        datetime_column_pos = []
+        for i, col_dtype in enumerate(dtypes.values):
+            if is_datetime64_any_dtype(col_dtype):
+                datetime_column_pos.append(i)
+            elif is_numeric_dtype(col_dtype):
+                numeric_column_pos.append(i)
+            else:
+                obj_column_pos.append(i)
+        if len(obj_column_pos) > 0:
+            obj_internal_frame = get_frame_by_col_pos(internal_frame, obj_column_pos)
+            obj_qc = SnowflakeQueryCompiler(obj_internal_frame)
+            unique_qc = obj_qc._nunique_columns(dropna=True)
+            # If the index is empty, later GROUP BY calls would return with no rows because
+            # there are no groups to group by. As such, we append a dummy row of NULL values to
+            # avoid incurring an extra query from an explicit emptiness check; the later GROUP BY
+            # to compute `freq` will ignore NULL values, so this will not affect the output.
+            obj_col_labels = obj_qc._modin_frame.data_column_pandas_labels
+            padded_qc = obj_qc.concat(
+                other=[
+                    SnowflakeQueryCompiler.from_pandas(
+                        native_pd.DataFrame(
+                            # Use a list comprehension instead of dict in case of duplicate labels
+                            [[None] * len(obj_col_labels)],
+                            columns=obj_col_labels,
+                            dtype="O",
+                        )
+                    )
+                ],
+                join="inner",
+                ignore_index=True,
+                axis=0,
+            )
+            # Compute top (the mode of each column) + freq (the number of times this mode appears).
+            top_freq_identifiers = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["top", "freq"]
+            )
+            # To accommodate multi-level columns in the source frame, we generate a new index column
+            # in the top/freq frame for each level. We transpose this frame later, so the columns
+            # of the transposed result will appropriately match those in the source frame.
+            new_index_labels = [None] * padded_qc._modin_frame.num_index_levels(axis=1)
+            new_index_identifiers = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=fill_none_in_index_labels(
+                    index_labels=new_index_labels,
+                    existing_labels=padded_qc._modin_frame.index_column_pandas_labels,
+                )
+            )
+
+            def count_freqs(
+                col_labels: Union[str, tuple[str, ...]], col_ident: str
+            ) -> OrderedDataFrame:
+                """
+                Helper function to compute the mode ("top") and frequency with which the mode
+                appears ("count") for a given column.
+
+                This helper returns a 1-row OrderedFrame with the columns "__index__", "top" and "freq",
+                containing the column name, the mode of this column, and the number of times the mode
+                occurs. This result should be UNION ALL'd together with the results from the other
+                columns of the original frame, then transposed so "top" and "freq" are rows.
+                If the source frame had multi-level columns, then "__level_0__", "__level_1__", etc.
+                are returned instead of "__index__".
+
+                This function performs a similar purpose to the existing QC.value_counts method, but
+                we cannot use that or QC.groupby_agg because of differing behaviors with columns of
+                with only NULL values. In the result of df.describe(), if all elements in a column
+                are NULL, its reported `top` and `freq` should be NULL and NaN, respectively.
+                QC.value_counts(dropna=True) ignores NULL values and would return an empty frame if
+                the column only has NULLs.
+                QC.value_counts(dropna=False) would correctly report NULL as the `top` item, but
+                reports `freq` as the number of times NULL appears, which we do not want.
+                """
+                top_ident, freq_ident = top_freq_identifiers
+                col_labels_tuple = (
+                    col_labels if is_list_like(col_labels) else (col_labels,)
+                )
+                assert len(col_labels_tuple) == len(
+                    new_index_identifiers
+                ), f"level of labels {col_labels_tuple} did not match level of identifiers {new_index_identifiers}"
+                # The below OrderedFrame operations are analogous to the following SQL for column "a":
+                # SELECT 'a' AS __index__,
+                #        a::VARIANT AS top,
+                #        IFF(a IS NULL, NULL, COUNT(a)) AS freq
+                # FROM df
+                # GROUP BY a
+                # ORDER BY freq DESC NULLS LAST
+                # LIMIT 1
+                #
+                # The resulting 1-row frame for column "a": [1, 1, 2] will have the form
+                # +-----------+-----+------+
+                # | __index__ | top | freq |
+                # +-----------+-----+------+
+                # |         a |   1 |    2 |
+                # +-----------+-----+------+
+                #
+                # which transposes to
+                # +------+---+
+                # |      | a |
+                # +------+---+
+                # |  top | 1 |
+                # +------+---+
+                # | freq | 2 |
+                # +------+---+
+                #
+                # If the source frame had multi-level columns, the same logic holds, but we will have more
+                # than one index column in the result. For example, the following 1-row frame is produced
+                # for multi-level column ("a", "b"): [1, 1, 2].
+                #
+                # +-------------+-------------+-----+------+
+                # | __level_0__ | __level_1__ | top | freq |
+                # +-------------+-------------+-----+------+
+                # |           a |           b |   1 |    2 |
+                # +-------------+-------------+-----+------+
+                #
+                # This transposes to
+                # +------+---+
+                # |      | a |
+                # +------+---+
+                # |      | b |
+                # +------+---+
+                # |  top | 1 |
+                # +------+---+
+                # | freq | 2 |
+                # +------+---+
+                return (
+                    padded_qc._modin_frame.ordered_dataframe.group_by(
+                        [col_ident],
+                        [
+                            iff(
+                                col(col_ident).is_null(),
+                                pandas_lit(None),
+                                count(col(col_ident)),
+                            ).as_(freq_ident),
+                        ],
+                    )
+                    .sort(OrderingColumn(freq_ident, ascending=False, na_last=True))
+                    .limit(1)
+                    .select(
+                        *(
+                            # If the original frame had multi-level columns, we must create
+                            # a multi-level index to transpose this frame later.
+                            [
+                                pandas_lit(col_label).as_(index_ident)
+                                for col_label, index_ident in zip(
+                                    col_labels_tuple, new_index_identifiers
+                                )
+                            ]
+                            + [
+                                col(col_ident).cast(VariantType()).as_(top_ident),
+                                freq_ident,
+                            ]
+                        )
+                    )
+                )
+
+            # count_freqs produces a 1-row frame with the column label(s), top element, and frequency
+            # for each column in the source frame; we union these all together and transpose the
+            # result to match the output of describe().
+            ordered_dataframe = functools.reduce(
+                lambda concat_frame, new_ordered_frame: concat_frame.union_all(
+                    new_ordered_frame
+                ),
+                itertools.starmap(
+                    count_freqs,
+                    zip(
+                        obj_col_labels,
+                        padded_qc._modin_frame.data_column_snowflake_quoted_identifiers,
+                    ),
+                ),
+            ).ensure_row_position_column()
+            top_freq_qc = SnowflakeQueryCompiler(
+                InternalFrame.create(
+                    ordered_dataframe=ordered_dataframe,
+                    data_column_pandas_labels=["top", "freq"],
+                    data_column_pandas_index_names=[None],
+                    data_column_snowflake_quoted_identifiers=top_freq_identifiers,
+                    index_column_pandas_labels=new_index_labels,
+                    index_column_snowflake_quoted_identifiers=new_index_identifiers,
+                )
+            ).transpose()
+            query_compilers_to_concat.extend([unique_qc, top_freq_qc])
+
+        # It's easier to perform multiple .agg calls and concat them than it is to perform a
+        # single call and reorder everything.
+        # Every aggregation in a list generates a new SELECT anyway, so it doesn't
+        # substantially impact query text size.
+        if len(datetime_column_pos) > 0:
+
+            def get_qcs_for_numeric_and_datetime_cols(
+                numeric_and_datetime_frame: InternalFrame,
+            ) -> list[SnowflakeQueryCompiler]:
+                """
+                Helper function to compute aggregation statistics on datetime columns by casting
+                them to NS since epoch, performing the computation, and casting them back.
+
+                Returns the list of query compilers for the performed aggregations, after converting
+                back to the appropriate datetime type.
+                """
+                # Can't use QC.astype() in case of duplicate columns since that requires label keys
+                numeric_and_datetime_frame_types = [
+                    numeric_and_datetime_frame.quoted_identifier_to_snowflake_type()[
+                        ident
+                    ]
+                    for ident in numeric_and_datetime_frame.data_column_snowflake_quoted_identifiers
+                ]
+                # Convert datetime cols to NS since epoch
+                datetime_as_epoch_qc = SnowflakeQueryCompiler(
+                    numeric_and_datetime_frame.update_snowflake_quoted_identifiers_with_expressions(
+                        {
+                            ident: column_astype(
+                                ident,
+                                from_sf_type=sf_type,
+                                to_dtype=np.int64,
+                                to_sf_type=TypeMapper.to_snowflake(np.int64),
+                            )
+                            for ident, sf_type in zip(
+                                numeric_and_datetime_frame.data_column_snowflake_quoted_identifiers,
+                                numeric_and_datetime_frame_types,
+                            )
+                            if isinstance(sf_type, TimestampType)
+                        }
+                    ).frame
+                )
+                # Convert aggregation results from NS since epoch back to datetimes
+                return [
+                    SnowflakeQueryCompiler(
+                        agg_qc._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+                            {
+                                ident: column_astype(
+                                    ident,
+                                    from_sf_type=TypeMapper.to_snowflake(np.int64),
+                                    to_dtype=TypeMapper.to_pandas(original_sf_type),
+                                    to_sf_type=original_sf_type,
+                                )
+                                for ident, original_sf_type in zip(
+                                    agg_qc._modin_frame.data_column_snowflake_quoted_identifiers,
+                                    numeric_and_datetime_frame_types,
+                                )
+                                if isinstance(original_sf_type, TimestampType)
+                            }
+                        ).frame
+                    )
+                    for agg_qc in [
+                        datetime_as_epoch_qc.agg(
+                            ["mean", "min"],
+                            axis=0,
+                            args=[],
+                            kwargs={},
+                        ),
+                        datetime_as_epoch_qc.quantiles_along_axis0(
+                            sorted_percentiles,
+                            numeric_only=True,
+                            index=format_percentiles(sorted_percentiles),
+                        ),
+                        datetime_as_epoch_qc.agg(
+                            ["max"],
+                            axis=0,
+                            args=[],
+                            kwargs={},
+                        ),
+                    ]
+                ]
+
+            numeric_and_datetime_frame = get_frame_by_col_pos(
+                internal_frame, sorted(numeric_column_pos + datetime_column_pos)
+            )
+            query_compilers_to_concat.extend(
+                get_qcs_for_numeric_and_datetime_cols(numeric_and_datetime_frame)
+            )
+            # If datetime and numeric columns both exist, then place std at the bottom
+            # and only compute std for numeric columns (not datetime)
+            # If datetime columns exist and numeric columns don't, skip the std aggregation
+            if len(numeric_column_pos) > 0:
+                numeric_qc = SnowflakeQueryCompiler(
+                    get_frame_by_col_pos(internal_frame, numeric_column_pos)
+                )
+                query_compilers_to_concat.append(
+                    numeric_qc.agg(["std"], axis=0, args=[], kwargs={})
+                )
+        elif len(numeric_column_pos) > 0:
+            # If numeric columns exist and datetime columns don't, place std between mean and min
+            numeric_qc = SnowflakeQueryCompiler(
+                get_frame_by_col_pos(internal_frame, numeric_column_pos)
+            )
+            query_compilers_to_concat.extend(
+                [
+                    numeric_qc.agg(
+                        ["mean", "std", "min"],
+                        axis=0,
+                        args=[],
+                        kwargs={},
+                    ),
+                    numeric_qc.quantiles_along_axis0(
+                        sorted_percentiles,
+                        numeric_only=True,
+                        index=format_percentiles(sorted_percentiles),
+                    ),
+                    numeric_qc.agg(
+                        ["max"],
+                        axis=0,
+                        args=[],
+                        kwargs={},
+                    ),
+                ]
+            )
+        # There must be more than one QC at this point -- all columns have one for count, obj columns
+        # will have unique + top/freq, and numeric will have mean/quantiles/max. If there is only
+        # one QC, then columns in the QC were somehow neither numeric nor non-numeric, which
+        # is not possible (dfs with no columns were already handled by the frontend).
+        assert (
+            len(query_compilers_to_concat) > 1
+        ), "must have more than one QC to concat"
+        return query_compilers_to_concat[0].concat(
+            other=query_compilers_to_concat[1:],
+            axis=0,
+            join="outer",
+        )
+
+    def sample(
+        self,
+        n: Optional[int],
+        frac: Optional[float],
+        replace: bool,
+        weights: Optional[Union[str, np.ndarray]] = None,
+        random_state: Optional[RandomState] = None,
+        axis: Optional[int] = 0,
+        ignore_index: Optional[bool] = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        The implementation to sample rows on a dataframe
+
+        Args:
+            n: Number of rows to return. Cannot be used with `frac`.
+            frac: Fraction of rows to return. Cannot be used with `n`.
+            replace : bool, default False
+                Allow or disallow sampling of the same row more than once.
+            weights : str or ndarray-like, optional
+                Default 'None' results in equal probability weighting.
+                If passed a Series, will align with target object on index. Index
+                values in weights not found in sampled object will be ignored and
+                index values in sampled object not in weights will be assigned
+                weights of zero.
+                If called on a DataFrame, will accept the name of a column
+                when axis = 0.
+                Unless weights are a Series, weights must be same length as axis
+                being sampled.
+                If weights do not sum to 1, they will be normalized to sum to 1.
+                Missing values in the weights column will be treated as zero.
+                Infinite values not allowed.
+            random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional
+                If int, array-like, or BitGenerator, seed for random number generator.
+                If np.random.RandomState or np.random.Generator, use as given.
+            axis : {0, 1}, default None
+                Axis to sample. Accepts axis number or name. Default is stat axis
+                for given data type. For `Series` this parameter is unused and defaults to `None`.
+            ignore_index : bool, default False
+                If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+        Returns:
+            The sampled query compiler
+        """
+        if axis == 1:
+            # i.e., axis = 1, use native pandas sample method to get the column sample positions
+            pandas_sample = pandas.DataFrame(columns=range(len(self.columns))).sample(
+                n=n,
+                frac=frac,
+                replace=replace,
+                weights=weights,
+                random_state=random_state,
+                axis=axis,
+                ignore_index=ignore_index,
+            )
+
+            # use the sample column positions to create the sample dataframe
+            return self.take_2d_positional(
+                index=slice(None), columns=pandas_sample.columns
+            )
+
+        # handle axis = 0
+        if weights is not None:
+            ErrorMessage.not_implemented("`weights` is not supported.")
+
+        if replace:
+            ErrorMessage.not_implemented("`replace = True` is not supported.")
+
+        if random_state is not None:
+            ErrorMessage.not_implemented("`random_state` is not supported.")
+
+        if frac is not None and frac > 1:
+            ErrorMessage.not_implemented("`frac > 1` is not supported.")
+
+        assert n is not None or frac is not None
+        frame = self._modin_frame
+        sampled_odf = frame.ordered_dataframe.sample(n=n, frac=frac)
+        logging.warning(
+            "Snowpark pandas `sample` will create a temp table for sampled results to keep it deterministic."
+        )
+        res = SnowflakeQueryCompiler(
+            InternalFrame.create(
+                ordered_dataframe=sampled_odf,
+                data_column_pandas_labels=frame.data_column_pandas_labels,
+                data_column_pandas_index_names=frame.data_column_pandas_index_names,
+                data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
+                index_column_pandas_labels=frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+            )
+        )
+        if ignore_index:
+            res = res.reset_index(drop=True)
+        return res
+
+    # Window API
+
+    def window_mean(
+        self,
+        fold_axis: Union[int, str],
+        window_kwargs: dict,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(
+            name="mean", class_="Window"
+        )  # pragma: no cover
+
+    def window_sum(
+        self,
+        fold_axis: Union[int, str],
+        window_kwargs: dict,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(
+            name="sum", class_="Window"
+        )  # pragma: no cover
+
+    def window_var(
+        self,
+        fold_axis: Union[int, str],
+        window_kwargs: dict,
+        ddof: int = 1,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(
+            name="var", class_="Window"
+        )  # pragma: no cover
+
+    def window_std(
+        self,
+        fold_axis: Union[int, str],
+        window_kwargs: dict,
+        ddof: int = 1,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(
+            name="std", class_="Window"
+        )  # pragma: no cover
+
+    # Rolling API
+
+    def rolling_count(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="count", class_="Rolling")
+
+    def rolling_sem(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="sem", class_="Rolling")
+
+    def rolling_sum(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_sum", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="sum",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def rolling_mean(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_mean", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="mean",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def rolling_median(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="median", class_="Rolling")
+
+    def rolling_var(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_var", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="var",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(ddof=ddof, numeric_only=numeric_only),
+        )
+
+    def rolling_std(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_var", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="std",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(ddof=ddof, numeric_only=numeric_only),
+        )
+
+    def rolling_min(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_min", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="min",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def rolling_max(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        WarningMessage.warning_if_engine_args_is_set(
+            "rolling_max", engine, engine_kwargs
+        )
+        return self._rolling_agg(
+            agg_func="max",
+            rolling_kwargs=rolling_kwargs,
+            agg_kwargs=dict(numeric_only=numeric_only),
+        )
+
+    def rolling_corr(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        other: Optional[SnowparkDataFrame] = None,
+        pairwise: Optional[bool] = None,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="corr", class_="Rolling")
+
+    def rolling_cov(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        other: Optional[SnowparkDataFrame] = None,
+        pairwise: Optional[bool] = None,
+        ddof: int = 1,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="cov", class_="Rolling")
+
+    def rolling_skew(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="skew", class_="Rolling")
+
+    def rolling_kurt(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="kurt", class_="Rolling")
+
+    def rolling_apply(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        func: Any,
+        raw: bool = False,
+        engine: Optional[Literal["cython", "numba"]] = None,
+        engine_kwargs: Optional[dict[str, bool]] = None,
+        args: Optional[tuple] = None,
+        kwargs: Optional[dict] = None,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="apply", class_="Rolling")
+
+    def rolling_aggregate(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        func: Union[str, list, dict],
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="aggregate", class_="Rolling")
+
+    def rolling_quantile(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        quantile: float,
+        interpolation: str = "linear",
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="quantile", class_="Rolling")
+
+    def rolling_rank(
+        self,
+        fold_axis: Union[int, str],
+        rolling_kwargs: dict,
+        method: str = "average",
+        ascending: bool = True,
+        pct: bool = False,
+        numeric_only: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        ErrorMessage.method_not_implemented_error(name="rank", class_="Rolling")
+
+    def _rolling_agg(
+        self,
+        agg_func: AggFuncType,
+        rolling_kwargs: dict[str, Any],
+        agg_kwargs: dict[str, Any],
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Compute rolling window with given aggregation.
+        Args:
+            agg_func: callable, str, list or dict. the aggregation function used.
+            rolling_kwargs: keyword arguments passed for to rolling.
+            agg_kwargs: keyword arguments passed for the aggregation function.
+        Returns:
+            SnowflakeQueryCompiler: with a newly constructed internal dataframe
+        """
+
+        window = rolling_kwargs.get("window")
+        min_periods = rolling_kwargs.get("min_periods")
+        center = rolling_kwargs.get("center")
+        numeric_only = agg_kwargs.get("numeric_only", False)
+        query_compiler = self
+        if numeric_only:
+            # Include only float, int, and boolean columns
+            query_compiler = drop_non_numeric_data_columns(
+                query_compiler=self, pandas_labels_for_columns_to_exclude=[]
+            )
+
+        # Throw NotImplementedError if any parameter is unsupported
+        check_is_window_supported_by_snowflake(rolling_kwargs)
+        frame = query_compiler._modin_frame.ensure_row_position_column()
+        row_position_quoted_identifier = frame.row_position_snowflake_quoted_identifier
+        if center:
+            # -(window // 2) is equivalent to window // 2 PRECEDING
+            rows_between_start = -(window // 2)  # type: ignore
+            rows_between_end = (window - 1) // 2  # type: ignore
+        else:
+            # 1 - window is equivalent to window - 1 PRECEDING
+            rows_between_start = 1 - window  # type: ignore
+            rows_between_end = Window.CURRENT_ROW
+
+        window_expr = Window.orderBy(col(row_position_quoted_identifier)).rows_between(
+            rows_between_start, rows_between_end
+        )
+
+        # Perform Aggregation over the window_expr
+        new_frame = frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                quoted_identifier: iff(
+                    count(col(quoted_identifier)).over(window_expr) >= min_periods,
+                    get_snowflake_agg_func(agg_func, agg_kwargs)(
+                        col(quoted_identifier)
+                    ).over(window_expr),
+                    pandas_lit(None),
+                )
+                for quoted_identifier in frame.data_column_snowflake_quoted_identifiers
+            }
+        ).frame
+        return self.__constructor__(new_frame)
+
+    def replace(
+        self,
+        to_replace: Union[str, int, float, ListLike, dict] = None,
+        value: Union[Scalar, ListLike, dict] = lib.no_default,
+        limit: Optional[int] = None,
+        regex: Union[bool, str, int, float, ListLike, dict] = False,
+        method: Union[str, lib.NoDefault] = lib.no_default,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Replace values given in `to_replace` by `value`.
+
+        Args:
+            to_replace: How to find values that will be replaced.
+            value: Value to replace any values matching `to_replace` with.
+            limit: Not implemented.
+            regex: bool or same types as `to_replace`, default False
+              Whether to interpret `to_replace` and/or `value` as regular
+              expressions. Alternatively, this could be a regular expression or a
+              list, dict, or array of regular expressions in which case
+              `to_replace` must be ``None``.
+            method: Not implemented.
+
+        Returns:
+            SnowflakeQueryCompiler with all `to_replace` values replaced by `value`.
+        """
+        if method is not lib.no_default:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas replace API does not support 'method' parameter"
+            )
+
+        if limit is not None:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas replace API does not support 'limit' parameter"
+            )
+
+        if value is lib.no_default and not is_dict_like(to_replace) and regex is False:
+            raise ValueError(
+                f"{type(self).__name__}.replace without 'value' and with non-dict-like "
+                "'to_replace' is not supported. Explicitly specify the new values "
+                "instead."
+            )
+
+        if not (
+            is_scalar(to_replace)
+            or is_re_compilable(to_replace)
+            or is_list_like(to_replace)
+        ):
+
+            raise TypeError(
+                "Expecting 'to_replace' to be either a scalar, array-like, "
+                "dict, or None, got invalid type "
+                f"{type(to_replace).__name__!r}"
+            )
+
+        if not is_bool(regex):
+            if to_replace is not None:
+                raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
+            logging.warning(
+                "Regex substitution is performed under the hood using "
+                "Snowflake backend. Which supports POSIX ERE syntax for "
+                "regular expressions. Please check usage notes for details"
+                " https://docs.snowflake.com/en/sql-reference/functions-regexp#general-usage-notes"
+            )
+            to_replace = regex
+            regex = True
+
+        # Convert 'to_replace' to canonically represent a dictionary, where key
+        # is column identifier and value is list of values to be replaced.
+        replace_map = {}
+        value_map = {}
+        identifiers = self._modin_frame.data_column_snowflake_quoted_identifiers
+        if is_scalar(to_replace):
+            replace_map = {i: to_replace for i in identifiers}
+        elif is_dict_like(to_replace):
+            dict_keys = list(to_replace.keys())  # type: ignore
+            dict_values = list(to_replace.values())  # type: ignore
+
+            # Nested dictionary
+            if value == lib.no_default and all(is_dict_like(v) for v in dict_values):
+                # Keys corresponds to column labels and values corresponds to
+                # to_replace to use for that column.
+                for label, ids in zip(
+                    dict_keys,
+                    self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                        dict_keys, include_index=False
+                    ),
+                ):
+                    for identifier in ids:
+                        dict_value = to_replace[label]  # type: ignore
+                        replace_map[identifier] = list(dict_value.keys())  # type: ignore
+                        value_map[identifier] = list(dict_value.values())  # type: ignore
+            elif value == lib.no_default:
+                # If value is not provided and to_replace is a dict. dictionary values
+                # should be treated as replacement values.
+                replace_map = {i: dict_keys for i in identifiers}
+                value_map = {i: dict_values for i in identifiers}
+            else:
+                # if value is provided, keys corresponds to column labels and dictionary
+                # values corresponds to to_replace to use for that column.
+                for label, ids in zip(
+                    dict_keys,
+                    self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                        dict_keys, include_index=False
+                    ),
+                ):
+                    for identifier in ids:
+                        replace_map[identifier] = to_replace[label]  # type: ignore
+        elif is_list_like(to_replace):
+            replace_map = {i: to_replace for i in identifiers}
+        else:
+            raise TypeError(f"Unsupported to_replace type: {type(to_replace)}")
+
+        # Convert 'value' to canonically represent a dictionary, where
+        # key is column identifiers and value is list of values to be used as
+        # replacements.
+        if is_scalar(value):
+            value_map = {i: value for i in identifiers}  # type: ignore
+        elif is_dict_like(value):
+            # Keys corresponds to column labels and values corresponds to
+            # replacement value to use for that column.
+            labels = list(value.keys())
+            for label, ids in zip(
+                labels,
+                self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    labels, include_index=False
+                ),
+            ):
+                for identifier in ids:
+                    value_map[identifier] = value[label]
+        elif is_list_like(value):
+            value_map = {i: value for i in identifiers}  # type: ignore
+        elif value != lib.no_default:
+            raise TypeError(f"Unsupported value type: {type(value)}")
+
+        replaced_column_exprs = {}
+        for identifier, to_replace in replace_map.items():
+            if identifier not in value_map:
+                continue
+            value = value_map[identifier]
+            if (
+                is_list_like(to_replace)
+                and is_list_like(value)
+                and len(to_replace) != len(value)  # type: ignore
+            ):
+                raise ValueError(
+                    f"Replacement lists must match in length. Expecting {len(to_replace)} got {len(value)} "  # type: ignore
+                )
+            if is_scalar(to_replace):
+                to_replace = [to_replace]
+            if is_scalar(value):
+                value = [value] * len(to_replace)  # type: ignore
+            column = col(identifier)
+            expr: Optional[CaseExpr] = None
+            for k, v in zip(to_replace, value):  # type: ignore
+                v = pandas_lit(v)
+                if native_pd.isna(k):
+                    cond = column.is_null()
+                elif regex is True:
+                    cond = column.regexp(pandas_lit(k))
+                    v = regexp_replace(subject=column, pattern=k, replacement=v)
+                else:
+                    cond = column == k
+                expr = when(cond, v) if expr is None else expr.when(cond, v)
+            expr = expr.otherwise(column) if expr is not None else expr
+            replaced_column_exprs[identifier] = expr
+
+        result = self._modin_frame.update_snowflake_quoted_identifiers_with_expressions(
+            replaced_column_exprs
+        )
+        return SnowflakeQueryCompiler(result.frame)
+
+    def add_substring(
+        self,
+        substring: str,
+        substring_type: Literal["prefix", "suffix"],
+        axis: Optional[int] = 0,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Add a substring to the current row or column labels.
+
+        Parameters
+        ----------
+        substring : str
+            The substring to add.
+        substring_type : {"prefix", "suffix"}
+            Whether to treat the substring as a prefix or a suffix.
+        axis : int
+            The axis to update.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler
+            The new query compiler with substring added.
+        """
+        frame = self._modin_frame
+        data_column_pandas_labels = frame.data_column_pandas_labels
+        data_column_snowflake_quoted_identifiers = (
+            frame.data_column_snowflake_quoted_identifiers
+        )
+
+        # Compute prefix + field_name + suffix for both add_prefix and add_suffix.
+        prefix = substring if substring_type == "prefix" else ""
+        suffix = substring if substring_type == "suffix" else ""
+
+        if axis == 1:
+            # This is the case for DataFrame.add_prefix/DataFrame.add_suffix where the column labels are modified.
+            if self._modin_frame.is_multiindex(axis=1):
+                # If the columns are a MultiIndex, the column labels are tuples. In this case the prefix/suffix is added
+                # to each element in the tuple. For instance, for a DataFrame df:
+                # >>> df
+                # +---------+------------+------------+------------+------------+
+                # | row_pos | (bar, one) | (bar, two) | (foo, one) | (foo, two) |
+                # +---------+------------+------------+------------+------------+
+                # |       0 |          1 |        1.1 |       True |          a |
+                # |       1 |          2 |        2.2 |      False |          b |
+                # +---------+------------+------------+------------+------------+
+                # >>> df.add_prefix("pre_")
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                # | row_pos | (pre_bar, pre_one) | (pre_bar, pre_two) | (pre_foo, pre_one) | (pre_foo, pre_two) |
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                # |       0 |                  1 |                1.1 |               True |                  a |
+                # |       1 |                  2 |                2.2 |              False |                  b |
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                # >>> df.add_suffix("_suf")
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                # | row_pos | (bar_suf, one_suf) | (bar_suf, two_suf) | (foo_suf, one_suf) | (foo_suf, two_suf) |
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                # |       0 |                  1 |                1.1 |               True |                  a |
+                # |       1 |                  2 |                2.2 |              False |                  b |
+                # +---------+--------------------+--------------------+--------------------+--------------------+
+                new_data_column_pandas_labels = []
+                for tuple_label in data_column_pandas_labels:
+                    new_tuple_label = tuple(
+                        prefix + str(label) + suffix for label in tuple_label
+                    )
+                    new_data_column_pandas_labels.append(new_tuple_label)
+                data_column_pandas_labels = new_data_column_pandas_labels
+            else:
+                # This is the case where the column labels are scalar.
+                data_column_pandas_labels = [
+                    prefix + str(label) + suffix for label in data_column_pandas_labels
+                ]
+
+        result_frame = InternalFrame.create(
+            ordered_dataframe=frame.ordered_dataframe,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        if axis == 0:
+            # This is the case for Series.add_prefix/Series.add_suffix where the index labels are modified. The index in
+            # result_frame needs to be updated.
+            index_column_quoted_identifiers = (
+                result_frame.index_column_snowflake_quoted_identifiers
+            )
+            # Map from a columns' snowflake quoted identifier to the prefix column expression. Each of these columns is
+            # explicitly cast to the string type to prevent type casting exceptions.
+            quoted_identifier_to_column_map = {}
+            # If the index is a MultiIndex, each level is a column in the Snowflake table, and the prefix/suffix
+            # operation is performed on all levels. For instance, for a Series ser with a two-level MultiIndex:
+            # >>> ser
+            # +--------+--------+------+
+            # | level0 | level1 | data |
+            # +--------+--------+------+
+            # |      0 |      a |  1.1 |
+            # |      1 |      b |  2.2 |
+            # +--------+--------+------+
+            # >>> ser.add_prefix("pre_")
+            # +--------+--------+------+
+            # | level0 | level1 | data |
+            # +--------+--------+------+
+            # |  pre_0 |  pre_a |  1.1 |
+            # |  pre_1 |  pre_b |  2.2 |
+            # +--------+--------+------+
+            # >>> ser.add_suffix("_suf")
+            # +--------+--------+------+
+            # | level0 | level1 | data |
+            # +--------+--------+------+
+            # |  0_suf |  a_suf |  1.1 |
+            # |  1_suf |  b_suf |  2.2 |
+            # +--------+--------+------+
+            num_levels = result_frame.num_index_columns
+            for level in range(num_levels):
+                level_identifier = index_column_quoted_identifiers[level]
+                original_string = col(level_identifier).cast("string")
+                new_string = (
+                    [pandas_lit(prefix), original_string]
+                    if prefix
+                    else [original_string, pandas_lit(suffix)]
+                )
+                quoted_identifier_to_column_map[level_identifier] = concat(*new_string)
+            # Get the new result frame with updated index.
+            result_frame = (
+                result_frame.update_snowflake_quoted_identifiers_with_expressions(
+                    quoted_identifier_to_column_map
+                ).frame
+            )
+
+        # Returning the query compiler with updated columns and index.
+        return SnowflakeQueryCompiler(result_frame)
+
+    def duplicated(
+        self,
+        subset: Union[Hashable, Sequence[Hashable]] = None,
+        keep: DropKeep = "first",
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return boolean Series denoting duplicate rows.
+
+        Parameters
+        ----------
+        subset : column label or sequence of labels, optional
+            Unused, accepted for compatibility with modin frontend.
+            Only consider certain columns for identifying duplicates, by
+            default use all the columns; this filtering is already performed on the frontend.
+        keep : {'first', 'last', False}, default 'first'
+            Determines which duplicates (if any) to mark.
+
+            - ``first`` : Mark duplicates as ``True`` except for the first occurrence.
+            - ``last`` : Mark duplicates as ``True`` except for the last occurrence.
+            - False : Mark all duplicates as ``True``.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`
+            Boolean series for each duplicated rows.
+        """
+        frame = self._modin_frame.ensure_row_position_column()
+
+        # When frame has no data columns, the result should be an empty series of dtype bool,
+        # which is internally represented as an empty dataframe with only the MODIN_UNNAMED_SERIES_LABEL column
+        if frame.data_column_snowflake_quoted_identifiers == []:
+            return SnowflakeQueryCompiler.from_pandas(
+                native_pd.DataFrame({MODIN_UNNAMED_SERIES_LABEL: []}, dtype=bool)
+            )
+
+        # The main idea is that we:
+        # First create a frame, which represents the list of row positions corresponding to the non-duplicate rows.
+        # Then, we outer join this frame with the input frame.
+        # And finally, we create the output frame which has a single boolean data column whose value depends on
+        # whether the non-duplicate row position is present or not.
+
+        row_position_post_dedup_quoted_identifier = (
+            frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["row_position_post_dedup"],
+            )[0]
+        )
+
+        if keep in ["first", "last"]:
+
+            # For first and last, the list of positions of non-duplicate rows is computed using the window funcions
+            # first_value and last_value, while paritioning by all data columns.
+
+            if keep == "first":
+                func = first_value
+            else:
+                assert keep == "last"
+                func = last_value
+            row_position_post_dedup = get_distinct_rows(
+                frame.ordered_dataframe.select(
+                    func(col(frame.row_position_snowflake_quoted_identifier))
+                    .over(
+                        Window.partition_by(
+                            frame.data_column_snowflake_quoted_identifiers
+                        ).order_by(frame.row_position_snowflake_quoted_identifier)
+                    )
+                    .as_(row_position_post_dedup_quoted_identifier)
+                )
+            )
+        else:
+            assert keep is False
+
+            # For keep=False, we cannot use window functions as before because we want to completely drop the
+            # partitions/groups representing duplicate rows. For this purpose we use group_by and count aggregation,
+            # such that only the groups with count=1 (non-duplicates) are kept.
+
+            row_position_post_dedup = (
+                frame.ordered_dataframe.group_by(
+                    frame.data_column_snowflake_quoted_identifiers,
+                    min_(col(frame.row_position_snowflake_quoted_identifier)).as_(
+                        row_position_post_dedup_quoted_identifier
+                    ),
+                    count(col("*")).as_("cnt"),
+                )
+                .filter(col("cnt") == 1)
+                .select(row_position_post_dedup_quoted_identifier)
+            )
+
+        row_position_post_dedup = row_position_post_dedup.ensure_row_position_column()
+        row_position_post_dedup_frame = InternalFrame.create(
+            ordered_dataframe=row_position_post_dedup,
+            data_column_pandas_labels=["row_position_post_dedup"],
+            data_column_snowflake_quoted_identifiers=[
+                row_position_post_dedup_quoted_identifier
+            ],
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=[
+                frame.row_position_snowflake_quoted_identifier
+            ],
+        )
+
+        joined_ordered_dataframe = join_utils.join(
+            left=frame,
+            right=row_position_post_dedup_frame,
+            left_on=[frame.row_position_snowflake_quoted_identifier],
+            right_on=[row_position_post_dedup_quoted_identifier],
+            how="outer",
+        ).result_frame.ordered_dataframe
+
+        duplicated_quoted_identifier = (
+            frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["duplicated"],
+            )[0]
+        )
+
+        new_col = iff(
+            col(row_position_post_dedup_quoted_identifier).is_null(),
+            pandas_lit(True),
+            pandas_lit(False),
+        ).as_(duplicated_quoted_identifier)
+
+        new_ordered_dataframe = joined_ordered_dataframe.select(
+            frame.index_column_snowflake_quoted_identifiers + [new_col]
+        )
+        new_frame = InternalFrame.create(
+            ordered_dataframe=new_ordered_dataframe,
+            data_column_pandas_labels=[MODIN_UNNAMED_SERIES_LABEL],
+            data_column_snowflake_quoted_identifiers=[duplicated_quoted_identifier],
+            data_column_pandas_index_names=frame.data_column_pandas_index_names,
+            index_column_pandas_labels=frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def _binary_op_between_dataframe_and_series_along_axis_0(
+        self,
+        op: str,
+        other: "SnowflakeQueryCompiler",
+        fill_value: Optional[Scalar] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Computes binary operation between DataFrame (self) and Series (other).
+
+        Example:
+            To compute the result of df + s
+            where df = pd.DataFrame({'A': [4,6,None,7], 'B': [2,5,4,None]})
+
+            |    |   A |   B |
+            |----|-----|-----|
+            |  0 |   4 |   2 |
+            |  1 |   6 |   5 |
+            |  2 | nan |   4 |
+            |  3 |   7 | nan |
+
+            and s = pd.Series([9, 10, 12], index=[3, 1, 4])
+
+            |    |   0 |
+            |----|-----|
+            |  3 |   9 |
+            |  1 |  10 |
+            |  4 |  12 |
+
+            the result is
+
+            |    |   A |   B |
+            |----|-----|-----|
+            |  0 | nan | nan |
+            |  1 |  16 |  15 |
+            |  2 | nan | nan |
+            |  3 |  16 | nan |
+            |  4 | nan | nan |
+
+            pandas first aligns the index of the Dataframe with the Series, and then carries the operation only out for any
+            rows with matching indices. This makes the operation similar to an outer join.
+            Applying the binary operation will preserve null values like in SQL. Unmatched rows in pandas
+            are considered NaN.
+
+        Args:
+            op: string identifying operation to carry out.
+            other: the right side operand, a SnowflakeQueryCompiler representing a Series.
+            fill_value: optional fill_value
+
+        Returns:
+            SnowflakeQueryCompiler representing result of binary op operation.
+        """
+
+        assert (
+            other.is_series_like()
+        ), "other must be a Snowflake Query Compiler representing a Series"
+
+        # pandas does not support fill_value for this scenario, raise compatible NotImplementedError here.
+        # This behavior exists also for pandas 2.0.3.
+        if fill_value is not None:
+            # code pointer: pandas/core/ops/__init__.py:L431 for 1.5.x, left as TODO
+            raise NotImplementedError(f"fill_value {fill_value} not supported.")
+
+        left_index_columns = self._modin_frame.index_column_snowflake_quoted_identifiers
+        right_index_columns = (
+            other._modin_frame.index_column_snowflake_quoted_identifiers
+        )
+
+        left_data_columns = self._modin_frame.data_column_snowflake_quoted_identifiers
+        right_data_columns = other._modin_frame.data_column_snowflake_quoted_identifiers
+
+        coalesce_config = [JoinKeyCoalesceConfig.LEFT] * len(left_index_columns)
+
+        joined_frame = join_utils.join(
+            self._modin_frame,
+            other._modin_frame,
+            how="outer",
+            left_on=left_index_columns,
+            right_on=right_index_columns,
+            sort=True,
+            join_key_coalesce_config=coalesce_config,
+            inherit_join_index=InheritJoinIndex.FROM_BOTH,
+        )
+        left_result_data_identifiers = (
+            joined_frame.result_column_mapper.map_left_quoted_identifiers(
+                left_data_columns
+            )
+        )
+        right_result_data_identifiers = (
+            joined_frame.result_column_mapper.map_right_quoted_identifiers(
+                right_data_columns
+            )
+        )
+
+        # Lazify type map here for calling compute_binary_op_between_snowpark_columns,
+        # this enables the optimization to pull datatypes only on-demand if needed.
+        def create_lazy_type_functions(
+            identifiers: list[str],
+        ) -> list[Callable[[], DataType]]:
+            """
+            create functions that return datatype on demand for an identifier.
+            Args:
+                identifiers: List of Snowflake quoted identifiers
+
+            Returns:
+                List of callables to enable lazy on-demand datatype retrieval.
+            """
+            return [
+                lambda: joined_frame.result_frame.quoted_identifier_to_snowflake_type()[
+                    identifier  # noqa: B023
+                ]
+                for identifier in identifiers
+            ]
+
+        left_datatypes = create_lazy_type_functions(left_result_data_identifiers)
+        right_datatypes = create_lazy_type_functions(right_result_data_identifiers)
+
+        # Right must be a Series, so there should be a single data column
+        assert len(right_result_data_identifiers) == 1, "other must be a Series"
+        right = right_result_data_identifiers[0]
+        right_datatype = right_datatypes[0]
+
+        # now replace in result frame identifiers with binary op result
+        update_result = joined_frame.result_frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                left: compute_binary_op_between_snowpark_columns(
+                    op, col(left), left_datatype, col(right), right_datatype
+                )
+                for left, left_datatype in zip(
+                    left_result_data_identifiers, left_datatypes
+                )
+            }
+        )
+        new_frame = update_result.frame
+
+        # keep only index columns and left identifiers (drop right, which stem from Series)
+        identifiers_to_keep = set(
+            new_frame.index_column_snowflake_quoted_identifiers
+        ) | set(update_result.old_id_to_new_id_mappings.values())
+        label_to_snowflake_quoted_identifier = tuple(
+            filter(
+                lambda pair: pair.snowflake_quoted_identifier in identifiers_to_keep,
+                new_frame.label_to_snowflake_quoted_identifier,
+            )
+        )
+
+        new_frame = InternalFrame(
+            ordered_dataframe=new_frame.ordered_dataframe,
+            label_to_snowflake_quoted_identifier=label_to_snowflake_quoted_identifier,
+            num_index_columns=new_frame.num_index_columns,
+            data_column_index_names=new_frame.data_column_index_names,
+        )
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def round(
+        self, decimals: Union[int, Mapping, "pd.Series"] = 0, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Round every numeric value up to specified number of decimals.
+
+        Parameters
+        ----------
+        decimals : int or list-like
+            Number of decimals to round each column to.
+        **kwargs : dict
+            Serves the compatibility purpose. Does not affect the result.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            QueryCompiler with rounded values.
+        """
+        if isinstance(decimals, pd.Series):
+            raise ErrorMessage.not_implemented(
+                "round with decimals of type Series is not yet supported"
+            )
+
+        if isinstance(decimals, dict):
+            decimals_keys = list(decimals.keys())
+            id_to_decimal_dict = {}
+            for label, ids in zip(
+                decimals_keys,
+                self._modin_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                    decimals_keys, include_index=False
+                ),
+            ):
+                for id in ids:
+                    id_to_decimal_dict[id] = decimals[label]
+
+        def round_col(col_name: ColumnOrName) -> SnowparkColumn:
+            if is_scalar(decimals):
+                return round(col_name, decimals)
+            elif is_dict_like(decimals):
+                if col_name in id_to_decimal_dict:
+                    return round(col_name, id_to_decimal_dict[col_name])
+                else:
+                    return col(col_name)
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: round_col(col_name)
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def idxmax(
+        self,
+        axis: int = 0,
+        skipna: bool = True,
+        numeric_only: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        Args:
+            axis : {0 or 1}, default 0
+                The axis to use. 0 for row-wise, 1 for column-wise.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            numeric_only: bool, default False:
+                Include only float, int or boolean data.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+        return self._idxmax_idxmin(
+            func="idxmax", axis=axis, skipna=skipna, numeric_only=numeric_only
+        )
+
+    def idxmin(
+        self,
+        axis: int = 0,
+        skipna: bool = True,
+        numeric_only: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        Args:
+            axis : {0 or 1}, default 0
+                The axis to use. 0 for row-wise, 1 for column-wise.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            numeric_only: bool, default False:
+                Include only float, int or boolean data.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+        return self._idxmax_idxmin(
+            func="idxmin", axis=axis, skipna=skipna, numeric_only=numeric_only
+        )
+
+    def _idxmax_idxmin(
+        self,
+        func: AggFuncType,
+        axis: int = 0,
+        skipna: bool = True,
+        numeric_only: bool = False,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Return index of first/last occurrence of maximum over requested axis.
+
+        Args:
+            func: {"idxmax" or "idxmin"}
+            axis : {0 or 1}, default 0
+                The axis to use. 0 for row-wise, 1 for column-wise.
+            skipna : bool, default True
+                Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+            numeric_only: bool, default False:
+                Include only float, int or boolean data.
+
+        Returns:
+            SnowflakeQueryCompiler
+        """
+        return self.agg(
+            func=func,
+            axis=axis,
+            args=[],
+            kwargs={
+                "numeric_only": numeric_only,
+                "skipna": skipna,
+            },
+        ).set_columns([None])
+
+    def _binary_op_between_dataframes(
+        self, op: str, other: "SnowflakeQueryCompiler", fill_value: Optional[Scalar]
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Compute binary operation between self and other, which both represent a DataFrame.
+        Args:
+            op: operation to carry out
+            other: the rhs when applying the binary op
+            fill_value: an optional fill_value
+
+        Returns:
+            SnowflakeQueryCompiler representing a DataFrame holding the result.
+        """
+
+        def infer_sorted_column_labels(
+            lhs_data_column_labels: list[Hashable],
+            rhs_data_column_labels: list[Hashable],
+        ) -> list[Hashable]:
+            """
+            Helper function to infer the column labels after combining two Dataframes. pandas does not follow
+            np.sort() or sorted(...) or sorted(..., key=lambda x: str(x)). In order to stay compatible with future pandas
+            versions infer order through pandas itself within this function.
+            Args:
+                lhs_data_column_labels: column labels of the left Dataframe, i.e. a list representing the values of DataFrame.columns.
+                rhs_data_column_labels: column labels of the right Dataframe, i.e. a list representing the values of DataFrame.columns.
+
+            Returns:
+                List of column labels of the combined Dataframe that would be the result of DataFrame <op> Series (or vice-versa).
+            """
+
+            # The column labels of the result Dataframe are independent of which binop is used.
+            # Create a dummy Dataframe with a single row of 0s and a dummy Series of 0s.
+            # Then apply a binary operation (here +), and retrieve the result columns.
+
+            lhs = native_pd.DataFrame(
+                data=[[0] * len(lhs_data_column_labels)],
+                columns=lhs_data_column_labels,
+            )
+            rhs = native_pd.DataFrame(
+                data=[[0] * len(rhs_data_column_labels)],
+                columns=rhs_data_column_labels,
+            )
+
+            combined_df = lhs + rhs
+            return list(combined_df.columns.values)
+
+        self_frame = self._modin_frame
+        other_frame = other._modin_frame
+
+        # pandas throws an incomprehensible error
+        # AssertionError: Gaps in blk ref_locs
+        # when either self_frame or other_frame have duplicate labels.
+        # Deviate here from pandas behavior, and throw an error similar to Series/Dataframe by surfacing
+        # to the other duplicate labels.
+        # Asserting this condition allows to simplify code below.
+        if not is_duplicate_free(
+            self_frame.data_column_pandas_labels
+        ) or not is_duplicate_free(other_frame.data_column_pandas_labels):
+            raise ValueError("cannot reindex on an axis with duplicate labels")
+
+        combined_data_labels = infer_sorted_column_labels(
+            self_frame.data_column_pandas_labels, other_frame.data_column_pandas_labels
+        )
+
+        # Align (join) both dataframes on columns and index.
+        align_result = join_utils.align(
+            left=self_frame,
+            right=other_frame,
+            left_on=self_frame.index_column_snowflake_quoted_identifiers,
+            right_on=other_frame.index_column_snowflake_quoted_identifiers,
+            how="outer",
+        )
+
+        left_right_pairs = prepare_binop_pairs_between_dataframe_and_dataframe(
+            align_result, combined_data_labels, self_frame, other_frame
+        )
+
+        replace_mapping = {
+            p.identifier: compute_binary_op_with_fill_value(
+                op=op,
+                lhs=p.lhs,
+                lhs_datatype=p.lhs_datatype,
+                rhs=p.rhs,
+                rhs_datatype=p.rhs_datatype,
+                fill_value=fill_value,
+            )
+            for p in left_right_pairs
+        }
+
+        # Create restricted frame with only combined / replaced labels.
+        updated_result = align_result.result_frame.update_snowflake_quoted_identifiers_with_expressions(
+            replace_mapping
+        )
+        updated_data_identifiers = [
+            updated_result.old_id_to_new_id_mappings[p.identifier]
+            for p in left_right_pairs
+        ]
+        new_frame = updated_result.frame
+        result_frame = InternalFrame.create(
+            ordered_dataframe=new_frame.ordered_dataframe,
+            data_column_pandas_labels=combined_data_labels,
+            data_column_pandas_index_names=new_frame.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=updated_data_identifiers,
+            index_column_pandas_labels=new_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=new_frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        return SnowflakeQueryCompiler(result_frame)
+
+    def _binary_op_between_dataframe_and_series_along_axis_1(
+        self,
+        op: str,
+        other: "SnowflakeQueryCompiler",
+        squeeze_self: bool,
+        fill_value: Optional[Scalar] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Compute result of DataFrame and Series (or vice-versa) along axis=1 (row-wise).
+        Args:
+            other: A Dataframe or Series
+            squeeze_self: indicates whether self is a series. If true, then self is a series.
+            fill_value: Optional fill-value, default None.
+
+        Returns:
+            SnowflakeQueryCompiler representing result of binop along axis 1.
+        """
+
+        # Applying binary operations along axis=1 works in pandas by aligning the index of the Series
+        # with the column labels (column index) of the DataFrame. The result will be a DataFrame. There is no
+        # obvious pattern on how the columns are sorted, so in this implementation the sorting order is inferred
+        # from pandas itself to stay compatible.
+        # Example:
+        # df = pd.DataFrame([[10, None, 20, None], [None, None, None, None]], columns=[1, 3, 4, 5])
+        # |    |   1 | 3   |   4 | 5   |
+        # |----|-----|-----|-----|-----|
+        # |  0 |  10 |     |  20 |     |
+        # |  1 | nan |     | nan |     |
+        #
+        # s = pd.Series([None, 1, 2, None, 3, 4, -99])
+        # |    |   0 |
+        # |----|-----|
+        # |  0 | nan |
+        # |  1 |   1 |
+        # |  2 |   2 |
+        # |  3 | nan |
+        # |  4 |   3 |
+        # |  5 |   4 |
+        # |  6 | -99 |
+        #
+        # The result of df.sub(s, axis=1) is
+        # |    |   0 |   1 |   2 |   3 |   4 |   5 |   6 |
+        # |----|-----|-----|-----|-----|-----|-----|-----|
+        # |  0 | nan |   9 | nan | nan |  17 | nan | nan |
+        # |  1 | nan | nan | nan | nan | nan | nan | nan |
+        # The logic matches the row (1, 1) from the Series to the column indexed by 1 and applies the value from the row (here 1) to
+        # each element in the column indexed by 1. In this case, 10 - 1 = 9.
+        # Similarly, the row (4, 3) is matched to the column indexed by 4 in the Dataframe. Here applying sub (-) yields
+        # 20 - 3 = 17.
+
+        # pandas compatible NotImplementedError
+        if fill_value is not None:
+            raise NotImplementedError(f"fill_value {fill_value} not supported.")
+
+        def infer_sorted_column_labels(
+            data_column_labels: list[Hashable], series: native_pd.Series
+        ) -> list[Hashable]:
+            """
+            Helper function to infer the column labels after combining a Series with a Dataframe. pandas does not follow
+            np.sort() or sorted(...) or sorted(..., key=lambda x: str(x)). In order to stay compatible with future pandas
+            versions infer order through pandas itself within this function.
+            Args:
+                data_column_labels: column labels of the Dataframe, i.e. a list representing the values of DataFrame.columns.
+                series: Series with which to combine a Dataframe having data_column_labels.
+
+            Returns:
+                List of column labels of the combined Dataframe that would be the result of DataFrame <op> Series (or vice-versa).
+            """
+
+            # The column labels of the result Dataframe are independent of which binop is used.
+            # Create a dummy Dataframe with a single row of 0s and a dummy Series of 0s.
+            # Then apply a binary operation (here +), and retrieve the result columns.
+
+            df = native_pd.DataFrame(
+                data=[[0] * len(data_column_labels)],
+                columns=data_column_labels,
+            )
+            s = native_pd.Series([0] * len(series), index=series.index)
+
+            combined_df = df + s
+            return list(combined_df.columns.values)
+
+        # For whichever side is the Series, collect the data. Alternatively, we could use transpose however the query count would
+        # be the same (as a describe needs to be issued to get the schema of the transposed data). To save on transposing
+        # and a describe query, directly collect data. We may want to revisit this in the future.
+        # Convert index values here to list, because is_duplicate_free does not support numpy arrays.
+        # Inherit the index names from the dataframe.
+        if squeeze_self:
+            # self is a Series, other a DataFrame.
+            series = self.to_pandas().squeeze()
+            self_column_labels = list(series.index.values)
+            other_column_labels = other._modin_frame.data_column_pandas_labels
+            frame = other._modin_frame
+            index_column_pandas_labels = other._modin_frame.index_column_pandas_labels
+
+            sorted_column_labels = infer_sorted_column_labels(
+                other._modin_frame.data_column_pandas_labels,
+                series,
+            )
+        else:
+            # self is a DataFrame, other a Series.
+            series = other.to_pandas().squeeze()
+            self_column_labels = self._modin_frame.data_column_pandas_labels
+            other_column_labels = list(series.index.values)
+            frame = self._modin_frame
+            index_column_pandas_labels = self._modin_frame.index_column_pandas_labels
+
+            sorted_column_labels = infer_sorted_column_labels(
+                self._modin_frame.data_column_pandas_labels,
+                series,
+            )
+
+        # Align both pandas labels from self and other.
+        # pandas produces a ValueError: cannot reindex on an axis with duplicate labels when there are duplicate labels
+        # if labels aren't unique. We use this below to optimize and avoid calling (a potentially expensive) align
+        # operation between Series and DataFrame.
+        if not is_duplicate_free(self_column_labels) or not is_duplicate_free(
+            other_column_labels
+        ):
+            raise ValueError("cannot reindex on an axis with duplicate labels")
+
+        # Add to frame NaN columns for all labels not present.
+        missing_labels = list(
+            filter(
+                lambda label: label not in frame.data_column_pandas_labels,
+                sorted_column_labels,
+            )
+        )
+        new_identifiers = frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=missing_labels
+        )
+
+        expanded_ordered_frame = append_columns(
+            frame.ordered_dataframe,
+            new_identifiers,
+            [pandas_lit(None)] * len(new_identifiers),
+        )
+
+        # Short-circuit: If there is no overlap between columns, pandas will append columns of other
+        # and every single column will be pandas_lit(None). The order is defined by sorted_column_labels
+        if len(set(self_column_labels) & set(other_column_labels)) == 0:
+            new_frame = InternalFrame.create(
+                ordered_dataframe=expanded_ordered_frame,
+                data_column_pandas_labels=sorted_column_labels,
+                data_column_pandas_index_names=[
+                    None
+                ],  # operation removes column index name always.
+                data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers
+                + new_identifiers,
+                index_column_pandas_labels=index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+            )
+            # Replace all columns with NULL literals.
+            new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+                {
+                    identifier: pandas_lit(None)
+                    for identifier in new_frame.data_column_snowflake_quoted_identifiers
+                }
+            ).frame
+
+            return SnowflakeQueryCompiler(new_frame)
+
+        # Regular case: There are overlapping columns/rows for which a computation needs to be carried out.
+        q_frame = sorted(
+            list(
+                zip(
+                    frame.data_column_pandas_labels,
+                    frame.data_column_snowflake_quoted_identifiers,
+                )
+            ),
+            key=lambda t: sorted_column_labels.index(t[0]),
+        )
+
+        q_missing = sorted(
+            list(zip(missing_labels, new_identifiers)),
+            key=lambda t: sorted_column_labels.index(t[0]),
+        )
+
+        pairs = merge_label_and_identifier_pairs(
+            sorted_column_labels, q_frame, q_missing
+        )
+
+        expanded_data_column_pandas_labels = list(map(lambda t: t[0], pairs))
+        expanded_data_column_snowflake_quoted_identifiers = list(
+            map(lambda t: t[1], pairs)
+        )
+
+        # Create new InternalFrame with updated mapping.
+        new_frame = InternalFrame.create(
+            ordered_dataframe=expanded_ordered_frame,
+            data_column_pandas_labels=expanded_data_column_pandas_labels,
+            data_column_pandas_index_names=[None],  # operation removes names
+            data_column_snowflake_quoted_identifiers=expanded_data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+        )
+
+        # For columns that exist in both self and other, update the corresponding identifier with the result
+        # of applying a binary operation between both.
+        overlapping_pairs = [
+            t
+            for t in zip(
+                new_frame.data_column_pandas_labels,
+                new_frame.data_column_snowflake_quoted_identifiers,
+            )
+            if t[0] in self_column_labels and t[0] in other_column_labels
+        ]
+
+        assert len(overlapping_pairs) > 0, "case for no overlapping pairs handled above"
+
+        datatype_getters = {
+            identifier: lambda: new_frame.quoted_identifier_to_snowflake_type()[
+                identifier  # noqa: B023
+            ]
+            for _, identifier in overlapping_pairs  # noqa: B023
+        }
+
+        new_frame = new_frame.update_snowflake_quoted_identifiers_with_expressions(
+            {
+                identifier: compute_binary_op_between_scalar_and_snowpark_column(
+                    op,
+                    series.loc[label],
+                    col(identifier),
+                    datatype_getters[identifier],
+                )
+                if squeeze_self
+                else compute_binary_op_between_snowpark_column_and_scalar(
+                    op,
+                    col(identifier),
+                    datatype_getters[identifier],
+                    series.loc[label],
+                )
+                for label, identifier in overlapping_pairs
+            }
+        ).frame
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def _str_startswith_endswith(
+        self,
+        pat: Union[str, tuple],
+        na: object = None,
+        is_startswith: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Test if the start (or end) of each string element matches a pattern.
+
+        Parameters
+        ----------
+        pat : str or tuple[str, …]
+            Character sequence or tuple of strings. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+        is_startswith : bool
+            True if the string operation is startswith. Otherwise, the string operation is endswith.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        if not native_pd.isna(na) and not isinstance(na, bool):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-bool 'na' argument"
+            )
+
+        if isinstance(pat, str):
+            pat = (pat,)
+        if not isinstance(pat, tuple):
+            raise TypeError(f"expected a string or tuple, not {type(pat).__name__}")
+
+        def output_col(
+            col_name: ColumnOrName, pat: tuple, na: object
+        ) -> SnowparkColumn:
+            if all([not isinstance(p, str) for p in pat]):
+                new_col = pandas_lit(np.nan)
+            else:
+                prefix = "" if is_startswith else "(.|\n)*"
+                suffix = "(.|\n)*" if is_startswith else ""
+                new_pat = "|".join(
+                    f"{prefix}{re.escape(p)}{suffix}" for p in pat if isinstance(p, str)
+                )
+                new_col = col(col_name).rlike(pandas_lit(new_pat))
+                if any([not isinstance(p, str) for p in pat]):
+                    new_col = iff(new_col, pandas_lit(True), pandas_lit(None))
+            return new_col if na is None else coalesce(new_col, pandas_lit(na))
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: output_col(col_name, pat, na)
+        )
+
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_startswith(
+        self, pat: Union[str, tuple], na: object = None
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Test if the start of each string element matches a pattern.
+
+        Parameters
+        ----------
+        pat : str or tuple[str, …]
+            Character sequence or tuple of strings. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        return self._str_startswith_endswith(pat, na, is_startswith=True)
+
+    def str_endswith(
+        self, pat: Union[str, tuple], na: object = None
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Test if the end of each string element matches a pattern.
+
+        Parameters
+        ----------
+        pat : str or tuple[str, …]
+            Character sequence or tuple of strings. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        return self._str_startswith_endswith(pat, na, is_startswith=False)
+
+    def str_capitalize(self) -> "SnowflakeQueryCompiler":
+        """
+        Capitalize the string
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            # We use delimeters and set it as the empty string so that we treat the entire string as one word
+            # and thus only capitalize the first character of the first word
+            lambda col: initcap(col, delimiters=pandas_lit(""))
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_isdigit(self) -> "SnowflakeQueryCompiler":
+        """
+        Check whether all characters in each string are digits.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: col(col_name).rlike("[0-9]+")
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_islower(self) -> "SnowflakeQueryCompiler":
+        """
+        Check whether all characters in each string are lowercase.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: col(col_name)
+            .rlike("(.|\n)*[a-zA-Z]+(.|\n)*")
+            .__and__(col(col_name).__eq__(lower(col_name)))
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_isupper(self) -> "SnowflakeQueryCompiler":
+        """
+        Check whether all characters in each string are uppercase.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: col(col_name)
+            .rlike("(.|\n)*[a-zA-Z]+(.|\n)*")
+            .__and__(col(col_name).__eq__(upper(col_name)))
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_istitle(self) -> "SnowflakeQueryCompiler":
+        """
+        Check whether each string is titlecase.
+        We do a regex matching as follows
+        ([^a-zA-Z]*[A-Z]{1}[a-z]*([^a-zA-Z]|$)+): matches a title pattern one or more times
+        [^a-zA-Z]*: matches any non-alpha character at the beginning
+        [A-Z]{1}: matches one uppercase letter
+        [a-z]*: match any lowercase letters
+        ([^a-zA-Z]|$)+)+$: ignore non-alpha characters at the end
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_identifier: col(col_identifier).rlike(
+                "^([^a-zA-Z]*[A-Z]{1}[a-z]*([^a-zA-Z]|$)+)+$"
+            )
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_lower(self) -> "SnowflakeQueryCompiler":
+        """
+        Convert strings to lowercase.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: lower(col_name)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_upper(self) -> "SnowflakeQueryCompiler":
+        """
+        Convert strings to uppercase.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: upper(col_name)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_title(self) -> "SnowflakeQueryCompiler":
+        """
+        Titlecase the string
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            # Capitalize the first character of each word
+            lambda col: initcap(col)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def _get_regex_params(self, flags: int = 0) -> str:
+        """
+        Convert the flags integer into its corresponding string representation.
+
+        Parameters
+        ----------
+        flags : int, default 0 (no flags)
+            Flags to pass through to the re module, e.g. re.IGNORECASE.
+
+        Returns
+        -------
+        String represention of the input int flags parameter.
+        """
+        if flags == 0:
+            return "c"
+        params = ""
+        if flags & re.IGNORECASE:
+            params = params + "i"
+        else:
+            params = params + "c"
+        if flags & re.MULTILINE:
+            params = params + "m"
+        if flags & re.DOTALL:
+            params = params + "s"
+        return params
+
+    def str_contains(
+        self,
+        pat: str,
+        case: bool = True,
+        flags: int = 0,
+        na: object = None,
+        regex: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Test if pattern or regex is contained within a string of a Series or Index.
+
+        Return boolean Series or Index based on whether a given pattern or regex is contained within a string of a Series or Index.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence or regular expression.
+        case : bool, default True
+            If True, case sensitive.
+        flags : int, default 0 (no flags)
+            Flags to pass through to the re module, e.g. re.IGNORECASE.
+        na : scalar, optional
+            Fill value for missing values. The default depends on dtype of the array. For object-dtype, numpy.nan is used. For StringDtype, pandas.NA is used.
+        regex : bool, default True
+            If True, assumes the pat is a regular expression.
+            If False, treats the pat as a literal string.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        if not native_pd.isna(na) and not isinstance(na, bool):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-bool 'na' argument"
+            )
+
+        if not regex:
+            pat = re.escape(pat)
+            flags = 0
+        pat = f"(.|\n)*{pat}(.|\n)*"
+        if flags & re.IGNORECASE == 0 and not case:
+            flags = flags | re.IGNORECASE
+        params = self._get_regex_params(flags)
+
+        def output_col(col_name: ColumnOrName) -> SnowparkColumn:
+            new_col = builtin("rlike")(
+                col(col_name), pandas_lit(pat), pandas_lit(params)
+            )
+            return new_col if pandas.isnull(na) else coalesce(new_col, pandas_lit(na))
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            output_col
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_count(
+        self, pat: str, flags: int = 0, **kwargs: Any
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Count occurrences of pattern in each string of the Series/Index.
+
+        This function is used to count the number of times a particular regex pattern is repeated in each of the string elements of the Series.
+
+        Parameters
+        ----------
+        pat : str
+            Valid regular expression.
+        flags : int, default 0, meaning no flags
+            Flags for the re module.
+        **kwargs
+            For compatibility with other string methods. Not used.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        params = self._get_regex_params(flags)
+
+        def output_col(col_name: ColumnOrName) -> SnowparkColumn:
+            if pat == "":
+                # Special case to handle empty search pattern.
+                # Snowflake's regexp_count returns 0, while pandas returns string length + 1.
+                return length(col(col_name)) + 1
+            return builtin("regexp_count")(
+                col(col_name), pandas_lit(pat), 1, pandas_lit(params)
+            )
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            output_col
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_len(self, **kwargs: Any) -> "SnowflakeQueryCompiler":
+        """
+        Compute the length of each element in the Series/Index
+
+        Parameters
+        ----------
+        **kwargs
+            For compatibility with other string methods. Not used.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        return SnowflakeQueryCompiler(
+            self._modin_frame.apply_snowpark_function_to_data_columns(length)
+        )
+
+    def str_split(
+        self,
+        pat: Optional[str] = None,
+        n: int = -1,
+        expand: bool = False,
+        regex: Optional[bool] = None,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Split strings around given separator/delimiter.
+
+        Splits the string in the Series/Index from the beginning, at the specified delimiter string.
+
+        Parameters
+        ----------
+        pat : str, optional
+            String to split on. If not specified, split on whitespace.
+        n : int, default -1 (all)
+            Limit number of splits in output. None, 0 and -1 will be interpreted as return all splits.
+        expand : bool, default False (Not implemented yet, should be set to False)
+            Expand the split strings into separate columns.
+            - If True, return DataFrame/MultiIndex expanding dimensionality.
+            - If False, return Series/Index, containing lists of strings.
+        regex : bool, default None (Not implemented yet, should be set to False or None)
+            Determines if the passed-in pattern is a regular expression:
+            - If True, assumes the passed-in pattern is a regular expression
+            - If False or None, treats the pattern as a literal string.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of string operation.
+        """
+        if pat is not None and not isinstance(pat, str):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-str 'pat' argument"
+            )
+        if expand:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support 'expand' argument"
+            )
+        if regex:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support 'regex' argument"
+            )
+        if pandas.isnull(regex):
+            regex = False
+
+        if n is None:
+            n = -1
+        elif not isinstance(n, (int, float)):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-numeric 'n' argument"
+            )
+
+        def output_col(
+            col_name: ColumnOrName, pat: Optional[str], n: int
+        ) -> SnowparkColumn:
+            if pandas.isnull(pat):
+                # When pat is null, it means we need to split on whitespace.
+                # For this purpose, we replace all sequences of whitespace characters with a single space.
+                # And we also trim whitespace from both ends of the string column.
+                new_pat = " "
+                whitespace_chars = " \t\r\n\f"
+                regex_pat = r"\s+"
+                regex_pat_as_prefix = r"\s+.*"
+                new_col = builtin("regexp_replace")(
+                    builtin("trim")(col(col_name), pandas_lit(whitespace_chars)),
+                    pandas_lit(regex_pat),
+                    pandas_lit(" "),
+                )
+
+                n_for_split_idx = iff(
+                    builtin("regexp_like")(
+                        col(col_name), pandas_lit(regex_pat_as_prefix)
+                    ),
+                    pandas_lit(n + 1),
+                    pandas_lit(n),
+                )
+            else:
+                new_pat = str(pat)
+                regex_pat = re.escape(str(pat))
+                new_col = col(col_name)
+                n_for_split_idx = pandas_lit(n)
+
+            if np.isnan(n):
+                # Follow pandas behavior
+                return pandas_lit(np.NaN)
+            elif n <= 0:
+                # If all possible splits are requested, we just use SQL's split function.
+                new_col = builtin("split")(new_col, pandas_lit(new_pat))
+            else:
+                # If a maximum number of splits is required, then we need to add logic to check
+                # if the delimiter (or pat) occurs enough times to satisfy the desired number of splits.
+                # If so, then SQL's split can be used.
+                # Otherwise (i.e., there are more delimiter occurrences than required for the split),
+                # we need to divide the string column into two parts - left and right:
+                # - The left part should have the requested number of delimiters - 1,
+                #   such that it can be split into n parts.
+                # - The right part will constitute the remaining (n+1st) part. In other words,
+                #   it will not be split and will remain intact irrespective of the number of
+                #   delimiter occurrences it has.
+                split_idx = builtin("regexp_instr")(
+                    col(col_name), pandas_lit(regex_pat), 1, n_for_split_idx, 1
+                )
+                new_col = iff(
+                    builtin("array_size")(
+                        builtin("split")(new_col, pandas_lit(new_pat))
+                    )
+                    <= pandas_lit(n + 1),
+                    builtin("split")(new_col, pandas_lit(new_pat)),
+                    builtin("array_append")(
+                        builtin("array_slice")(
+                            builtin("split")(new_col, pandas_lit(new_pat)),
+                            pandas_lit(0),
+                            pandas_lit(n),
+                        ),
+                        builtin("substr")(
+                            col(col_name),
+                            split_idx,
+                        ),
+                    ),
+                )
+            if pandas.isnull(pat):
+                new_col = iff(
+                    builtin("regexp_like")(col(col_name), pandas_lit(r"\s*")),
+                    pandas_lit([]),
+                    new_col,
+                )
+            return new_col
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: output_col(col_name, pat, n)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_replace(
+        self,
+        pat: str,
+        repl: Union[str, Callable],
+        n: int = -1,
+        case: Optional[bool] = None,
+        flags: int = 0,
+        regex: bool = True,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Replace each occurrence of pattern/regex in the Series/Index.
+
+        Equivalent to str.replace() or re.sub(), depending on the regex value.
+
+        Parameters
+        ----------
+        pat : str
+            String can be a character sequence or regular expression.
+        repl : str or callable
+            Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See re.sub().
+        n : int, default -1 (all)
+            Number of replacements to make from start.
+        case : bool, default None
+            Determines if replace is case sensitive:
+            - If True, case sensitive (the default if pat is a string)
+            - Set to False for case insensitive
+            - Cannot be set if pat is a compiled regex.
+        flags : int, default 0 (no flags)
+            Regex module flags, e.g. re.IGNORECASE. Cannot be set if pat is a compiled regex.
+        regex : bool, default False
+            Determines if the passed-in pattern is a regular expression:
+            - If True, assumes the passed-in pattern is a regular expression.
+            - If False, treats the pattern as a literal string
+            - Cannot be set to False if pat is a compiled regex or repl is a callable.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of string operation.
+        """
+        if pat is None or not isinstance(pat, str):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-str 'pat' argument"
+            )
+
+        if callable(repl) or not isinstance(repl, str):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-str 'repl' argument"
+            )
+
+        if pandas.isnull(n):
+            n = -1
+        elif not isinstance(n, (int, float)) or n == 0:
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-numeric or zero-valued 'n' argument"
+            )
+
+        if pandas.isnull(case):
+            case = True
+        if flags & re.IGNORECASE > 0:
+            case = False
+        if flags & re.IGNORECASE == 0 and not case:
+            flags = flags | re.IGNORECASE
+
+        def output_col(
+            col_name: ColumnOrName, pat: str, n: int, flags: int
+        ) -> SnowparkColumn:
+            if regex or (case is not None and not case) or n > 0:
+                # Here we handle the cases where SQL's regexp_replace rather than SQL's replace
+                # needs to be used.
+                if not regex:
+                    pat = re.escape(pat)
+                params = self._get_regex_params(flags)
+                if n < 0:
+                    # Replace all occurrences.
+                    new_col = builtin("regexp_replace")(
+                        col(col_name), pat, repl, 1, 0, params
+                    )
+                elif n == 1:
+                    # Replace first occurrence.
+                    new_col = builtin("regexp_replace")(
+                        col(col_name), pat, repl, 1, 1, params
+                    )
+                else:
+                    # Replace first n occurences through these steps:
+                    # (1) Find index of nth occurence (if present).
+                    # (2) Use found index as a splitting point between a left and a right part of the string column.
+                    # (3) Replace all occurrences in the left part and leave right part unchanged.
+                    # (4) Concat left and right parts.
+                    split_idx = iff(
+                        builtin("regexp_instr")(col(col_name), pat, 1, 1, 1, params)
+                        == 0,
+                        0,
+                        iff(
+                            builtin("regexp_instr")(col(col_name), pat, 1, n, 1, params)
+                            == 0,
+                            builtin("len")(col(col_name)) + 1,
+                            builtin("regexp_instr")(
+                                col(col_name), pat, 1, n, 1, params
+                            ),
+                        )
+                        - 1,
+                    )
+                    new_col = builtin("concat")(
+                        builtin("regexp_replace")(
+                            builtin("left")(col(col_name), split_idx),
+                            pat,
+                            repl,
+                            1,
+                            0,
+                            params,
+                        ),
+                        builtin("right")(
+                            col(col_name), builtin("len")(col(col_name)) - split_idx
+                        ),
+                    )
+            else:
+                # Replace all occurrences using SQL's replace.
+                new_col = builtin("replace")(col(col_name), pat, repl)
+            return new_col
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            lambda col_name: output_col(col_name, pat, n, flags)
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def str_strip(self, to_strip: Union[str, None] = None) -> "SnowflakeQueryCompiler":
+        """
+        Remove leading and trailing characters.
+
+        Strip whitespaces (including newlines) or a set of specified characters from each string in the Series/Index from left and right sides. Replaces any non-strings in Series with NaNs. Equivalent to str.strip().
+
+        Parameters
+        ----------
+        to_strip : str or None, default None
+            Specifying the set of characters to be removed. All combinations of this set of characters will be stripped. If None then whitespaces are removed.
+
+        Returns
+        -------
+        SnowflakeQueryCompiler representing result of the string operation.
+        """
+        if not pandas.isnull(to_strip) and not isinstance(to_strip, str):
+            ErrorMessage.not_implemented(
+                "Snowpark pandas doesn't support non-str 'to_strip' argument"
+            )
+
+        def output_col(col_name: ColumnOrName) -> SnowparkColumn:
+            if to_strip is None:
+                return builtin("trim")(col(col_name), pandas_lit("\t\n\r\f "))
+            return builtin("trim")(col(col_name), pandas_lit(to_strip))
+
+        new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns(
+            output_col
+        )
+        return SnowflakeQueryCompiler(new_internal_frame)
+
+    def qcut(
+        self,
+        q: Union[int, ListLike],
+        retbins: bool,
+        duplicates: Union[Literal["raise", "drop"]],
+        precision: int = 3,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Computes for self (which is assumed to be representing a Series) into which bins data falls.
+        Args:
+            q: integer or list of floating point quantiles (increasing)
+            retbins: return bins as well (not supported yet)
+            duplicates: When constructing bins from quantiles, duplicate bins may exist. If 'raise' abort execution and report to user a ValueError, if 'drop' remove duplicate bins and continue.
+            precision: Bins are constructed as left-open intervals of the form (a, b]. Depending on the precision specified (default: 3), to distinguish whether an element falls into a bin b_1 or b_2, the value is decreased by an epsilon = 10**(-precision).
+        Returns:
+            SnowflakeQueryCompiler representing a Series with indices to the bins.
+        """
+
+        if retbins is True:
+            # TODO: SNOW-1225562, support retbins=True.
+            ErrorMessage.not_implemented("no support for returning bins yet.")
+
+        # There are two cases to consider:
+        # 1. q is an integer, which means divide the data into q equiwidth bins.
+        # 2. q is a list of floats representing quantiles (must be [0, 1], checked in frontend) from which
+        #    bins are constructed.
+
+        # If q is an integer, construct the correct quantiles first.
+        if isinstance(q, int):
+            # taken from pandas.
+            q = list(np.linspace(0, 1, q + 1))
+
+        # Construct bins from quantiles.
+        # First step is to transform the quantiles given as a list of float values in q to values according to the data.
+        qc_quantiles = self.quantiles_along_axis0(q, True, "linear", "single", None)
+        # There are two behaviors here:
+        # - If duplicates = 'raise', check if there are duplicates and raise an error.
+        # - If drop, ignore and continue with distinct quantile values.
+
+        if duplicates == "raise":
+            # Check if there are duplicates by issuing an extra query, and raise if so.
+            # If not, proceed and assume quantiles to be duplicate free.
+
+            # Note: This could be done with one query, same logic should be reused for Series.is_unique().
+            n_unique = qc_quantiles.nunique(dropna=False, axis=0).to_numpy().ravel()[0]
+
+            if n_unique != len(q):
+
+                # if self has a single element or is empty, duplicates are ok - even for 'raise'.
+                if self.get_axis_len(0) > 1:
+                    arr = qc_quantiles.to_numpy().ravel()
+                    # throw Pandas compatible error message
+                    raise ValueError(
+                        f"Bin edges must be unique: {repr(arr)}.\nYou can drop duplicate edges by setting the 'duplicates' kwarg"
+                    )
+
+        # other duplicates case ('drop') is handled here.
+        qc_unique_quantiles = qc_quantiles.unique()
+
+        # There will be 0, ..., len(qc_unique_quantiles) - 1 cuts, result will be thus in this range.
+        # We can find for values the cut they belong to by performing a left <= join. As this feature is not supported
+        # within OrderedDataFrame yet, we use the Snowpark layer directly. This should have no negative
+        # consequences when it comes to building lazy graphs, as qcut is a materializing operation.
+
+        quantile_frame = qc_unique_quantiles._modin_frame.ensure_row_position_column()
+        value_frame = self._modin_frame.ensure_row_position_column()
+
+        (
+            quantile_data_identifier,
+            quantile_row_position_identifier,
+            value_data_identifier,
+            value_row_position_identifier,
+        ) = value_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=["q_data", "q_row_pos", "v_data", "v_row_pos"]
+        )
+
+        value_index_identifiers = value_frame.index_column_snowflake_quoted_identifiers
+
+        quantile_snowpark_frame = (
+            quantile_frame.ordered_dataframe.to_projected_snowpark_dataframe(
+                True, True, True
+            )
+        )
+        value_snowpark_frame = (
+            value_frame.ordered_dataframe.to_projected_snowpark_dataframe(
+                True, True, True
+            )
+        )
+
+        # relabel to new identifiers to reference within range join below.
+        quantile_snowpark_frame = quantile_snowpark_frame.select(
+            col(quantile_frame.data_column_snowflake_quoted_identifiers[0]).as_(
+                quantile_data_identifier
+            ),
+            col(quantile_frame.row_position_snowflake_quoted_identifier).as_(
+                quantile_row_position_identifier
+            ),
+        )
+
+        value_snowpark_frame = value_snowpark_frame.select(
+            *tuple(value_index_identifiers),
+            col(value_frame.data_column_snowflake_quoted_identifiers[0]).as_(
+                value_data_identifier
+            ),
+            col(value_frame.row_position_snowflake_quoted_identifier).as_(
+                value_row_position_identifier
+            ),
+        )
+
+        # Perform a left join. The idea is to find all values which fall into an interval defined by the quantiles
+        # in quantile. The closest can be then identified using the row position. An alternative to this
+        # was to use an ASOF join with a proper matching condition.
+        ans = value_snowpark_frame.join(
+            quantile_snowpark_frame,
+            value_snowpark_frame[value_data_identifier]
+            <= quantile_snowpark_frame[quantile_data_identifier],
+            how="left",
+            lsuffix="_L",
+            rsuffix="_R",
+        )
+
+        # Result will be v_row_pos and min(q_row_pos) - 1. However, to deal with the edge cases we need to correct
+        # for the case when the result is in the left-most interval. Therefore, floor it with 0.
+        ans = ans.group_by(
+            value_index_identifiers
+            + [value_data_identifier, value_row_position_identifier]
+        ).min(quantile_row_position_identifier)
+        column_names = ans.columns
+        ans = ans.select(
+            *tuple(value_index_identifiers),
+            col(value_row_position_identifier),
+            iff(  # floor result here to 0.
+                col(column_names[-1]) != pandas_lit(0),
+                col(column_names[-1]) - pandas_lit(1),
+                col(column_names[-1]),
+            ),
+        )
+        column_names = ans.columns
+        new_data_identifier = column_names[-1]
+
+        # Create OrderedDataFrame and InternalFrame and QC out of this.
+        # Need to restore index as well which has been passed through.
+        new_ordered_dataframe = OrderedDataFrame(
+            DataFrameReference(ans),
+            projected_column_snowflake_quoted_identifiers=value_index_identifiers
+            + [new_data_identifier],
+            ordering_columns=[OrderingColumn(value_row_position_identifier)],
+            row_position_snowflake_quoted_identifier=value_row_position_identifier,
+        )
+
+        new_frame = InternalFrame.create(
+            ordered_dataframe=new_ordered_dataframe,
+            data_column_pandas_labels=self._modin_frame.data_column_pandas_labels,
+            data_column_pandas_index_names=self._modin_frame.data_column_index_names,
+            data_column_snowflake_quoted_identifiers=[new_data_identifier],
+            index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=value_index_identifiers,
+        )
+
+        return SnowflakeQueryCompiler(new_frame)
+
+    def _groupby_head_tail(
+        self,
+        n: int,
+        op_type: Literal["head", "tail"],
+        by: Any,
+        level: Optional[IndexLabel],
+        dropna: bool,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Select the first or last n rows/entries in a group.
+
+        Helper function for DataFrameGroupBy.head and DataFrameGroupBy.tail. Since both use similar logic, it is
+        combined in this method. In this helper:
+        - if n == 0, an empty frame is returned.
+        - if op_type="head" and n > 0, select the first n entries in the group.
+        - if op_type="head" and n < 0, exclude the last n entries in the group.
+        - if op_type="tail" and n > 0, select the last n entries in the group.
+        - if op_type="tail" and n < 0, exclude the first n entries in the group.
+
+        Args:
+            n: number of entries to select. For head and tail, the rows selected varies based on the sign of n.
+            op_type: Whether a head or tail operation needs to be performed.
+            by: Used to determine the groups for the groupby.
+            level: If the axis is a MultiIndex (hierarchical), group by a particular level or levels.
+                Do not specify both by and level.
+            dropna: Whether the rows with NA group keys need to be dropped.
+
+        Returns:
+            A SnowflakeQueryCompiler object representing a DataFrame.
+        """
+        original_frame = self._modin_frame
+        ordered_dataframe = original_frame.ordered_dataframe
+
+        assert op_type in ["head", "tail"], "op_type must be head or tail."
+
+        if n == 0:
+            # None of the rows should be selected, an empty DataFrame must be returned.
+            return SnowflakeQueryCompiler(original_frame.filter(pandas_lit(False)))
+
+        # STEP 1: Extract the column(s) used to group the data by.
+        by_list = extract_groupby_column_pandas_labels(self, by, level)
+        by_snowflake_quoted_identifiers_list = [
+            entry[0]
+            for entry in original_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+                by_list
+            )
+        ]
+        # Copy of the snowflake_quoted_identifiers of the column(s) used to group the data by.
+        partition_list = by_snowflake_quoted_identifiers_list.copy()
+
+        # STEP 2: Create a select list containing the index columns, data columns, and columns added for
+        # generating the filtering condition (groupby row number column, groupby count column).
+        select_list = []
+        # Record the new snowflake_quoted_identifiers that the grouping columns use
+        # this is used in STEP 3 to determine which rows to drop when the group keys are NA values.
+        new_groupby_sf_identifiers = []
+
+        # Add required index columns to the select list.
+        # Recording index column identifiers for creating a new internal frame in STEP 4.
+        # Generate identifiers for every column beforehand to handle duplicate column labels.
+        index_column_aliases = ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=original_frame.index_column_pandas_labels
+        )
+        index_column_snowflake_quoted_identifiers = []
+        for col_ident, col_alias in zip(
+            original_frame.index_column_snowflake_quoted_identifiers,
+            index_column_aliases,
+        ):
+            # An alias is required for all columns selected from the ordered dataframe.
+            select_list.append(col(col_ident).alias(col_alias))
+            index_column_snowflake_quoted_identifiers.append(col_alias)
+            if col_ident in by_snowflake_quoted_identifiers_list:
+                # The grouping identifiers when `level` is specified come from the index columns.
+                new_groupby_sf_identifiers.append(col_alias)
+
+        # Add required data columns to the select list.
+        # Recording data column identifiers for creating a new internal frame in STEP 4.
+        # Generate identifiers for every column beforehand to handle duplicate column labels.
+        data_column_aliases = ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=original_frame.data_column_pandas_labels
+        )
+        data_column_snowflake_quoted_identifiers = []
+        for col_ident, col_alias in zip(
+            original_frame.data_column_snowflake_quoted_identifiers,
+            data_column_aliases,
+        ):
+            # An alias is required for all columns selected from the ordered dataframe.
+            select_list.append(col(col_ident).alias(col_alias))
+            data_column_snowflake_quoted_identifiers.append(col_alias)
+            if col_ident in by_snowflake_quoted_identifiers_list:
+                # The grouping identifiers when `by` is specified come from the data columns.
+                new_groupby_sf_identifiers.append(col_alias)
+
+        # Create a column to record the row numbers in every group. This helps us identify each row.
+        grouped_row_num_label = "grouped_row_num_label"
+        grouped_row_num_alias = ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[grouped_row_num_label]
+        )[0]
+        select_list.append(
+            row_number()
+            .over(
+                Window.partition_by(partition_list).order_by(
+                    ordered_dataframe._ordering_snowpark_columns()
+                )
+            )
+            .alias(grouped_row_num_alias)
+        )
+
+        # Creating a column to find the largest row number in every group. This helps with selecting
+        # the last n entries in a frame, and when we need to perform an exclusive operation (n < 0).
+        count_label = "grouped_count_row_label"
+        count_alias = ordered_dataframe.generate_snowflake_quoted_identifiers(
+            pandas_labels=[count_label]
+        )[0]
+        select_list.append(
+            count("*").over(Window.partition_by(partition_list)).alias(count_alias)
+        )
+
+        # STEP 3: Create an ordered_dataframe that is grouped by the specified "groupby column(s)".
+        # Then create the filtering conditions using the groupby row number and count columns.
+        grouped_ordered_dataframe = ordered_dataframe.select(select_list)
+
+        grouped_row_num_id, grouped_count_id = -2, -1
+        # Get the column which represents the row numbers in every group. This is the penultimate column.
+        grouped_row_num_col = col(
+            grouped_ordered_dataframe.projected_column_snowflake_quoted_identifiers[
+                grouped_row_num_id
+            ]
+        )
+        # Get the column which represents the largest row number in every group. This is the last column.
+        grouped_count_col = col(
+            grouped_ordered_dataframe.projected_column_snowflake_quoted_identifiers[
+                grouped_count_id
+            ]
+        )
+
+        # Creating the filter conditions. Row number starts at 1, not 0.
+        if n > 0:  # select operations
+            if op_type == "head":
+                # Select first n rows.
+                filter_cond = grouped_row_num_col <= pandas_lit(n)
+            else:  # op_type == "tail"
+                # Select last n rows.
+                filter_cond = grouped_row_num_col > (grouped_count_col - pandas_lit(n))
+        else:  # n < 0, exclusive operations
+            if op_type == "head":
+                # Exclude the last n rows in a group.
+                filter_cond = grouped_row_num_col <= (grouped_count_col + pandas_lit(n))
+            else:  # op_type == "tail"
+                # Exclude the first n rows in a group.
+                filter_cond = grouped_row_num_col > pandas_lit(n * -1)
+
+        # If dropna=True, need to drop the rows where an NA value is present in any of the grouping columns, i.e.,
+        # the grouping column has a NA group key.
+        if dropna and len(new_groupby_sf_identifiers) > 0:
+            dropna_cond = functools.reduce(
+                lambda combined_col, col: combined_col | col,
+                map(
+                    lambda by_snowflake_quoted_identifier: col(
+                        by_snowflake_quoted_identifier
+                    ).is_null(),
+                    new_groupby_sf_identifiers,
+                ),
+            )
+            # Add the dropna condition to the filter condition.
+            filter_cond = filter_cond & ~dropna_cond
+
+        # STEP 4: Filter the grouped ordered_frame and create a new internal_frame and qc from it.
+        filtered_ordered_dataframe = grouped_ordered_dataframe.filter(filter_cond)
+        new_modin_frame = InternalFrame.create(
+            ordered_dataframe=filtered_ordered_dataframe,
+            data_column_pandas_labels=original_frame.data_column_pandas_labels,
+            data_column_pandas_index_names=[None],  # operation removes names
+            data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=original_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
+        )
+
+        return SnowflakeQueryCompiler(new_modin_frame)
+
+    def cut(
+        self,
+        bins: Union[int, Sequence[Scalar], pandas.IntervalIndex],
+        right: bool = True,
+        labels: Union[ListLike, bool, None] = None,
+        precision: int = 3,
+        include_lowest: bool = False,
+        duplicates: str = "raise",
+    ) -> tuple[Sequence[Scalar], "SnowflakeQueryCompiler"]:
+        """
+        Compute result of pd.cut for self, which is assumed to be a Series.
+
+        Args:
+            bins: see cut
+            right: see cut
+            labels: see cut
+            precision: see cut
+            include_lowest: see cut
+            duplicates: see cut
+
+        Returns:
+            tuple of (adjusted) bins, and a QC representing the result as Series of the operation.
+        """
+
+        # Retrieve min/max from self. If empty, abort with ValueError as in pandas.
+        min, max, row_count = (
+            self.agg(["min", "max", "count"], axis=0, args=(), kwargs={})
+            .to_pandas()
+            .squeeze(axis=1)
+        )
+
+        if row_count == 0:
+            raise ValueError("Cannot cut empty array")
+
+        bins = preprocess_bins_for_cut(min, max, bins, right, include_lowest, precision)
+
+        # If duplicates is set to 'raise', check for duplicates.
+        # If 'drop', remove duplicate edges here
+        if duplicates == "raise":
+            if len(set(bins)) < len(bins):
+                raise ValueError(
+                    f"Bin edges must be unique: {repr(bins)}.\nYou can drop duplicate edges by setting the 'duplicates' kwarg"
+                )
+        else:
+            bins = sorted(list(set(bins)))
+
+        qc_bins = SnowflakeQueryCompiler.from_pandas(pandas.DataFrame(bins))
+
+        bin_indices_frame = compute_bin_indices(
+            self._modin_frame, qc_bins._modin_frame, len(bins), right
+        )
+
+        # If labels=None, instead of returning indices return intervals.
+        # We do not support intervals in Snowpark Pandas yet, an error is produced in TypeMapper.to_snowflake.
+        if labels is None:
+            # labels will be based on indices
+            labels = [
+                pandas.Interval(bins[i], bins[i + 1]) for i in range(len(bins) - 1)
+            ]
+
+        if labels is False:
+            # Directly return result, no adjustment necessary to convert bin indices -> bin labels.
+            return bins, SnowflakeQueryCompiler(bin_indices_frame)
+
+        assert isinstance(labels, list)
+
+        # Note: In Snowpark pandas API, we do not support Interval.
+        # This means that labels=None will produce an error of the form
+        # TypeError: Can not infer schema for type: <class 'pandas._libs.interval.Interval'>
+        # originating in TypeMapper.to_snowflake.
+        # This error is surfaced as is, to support labels=None, the logic here does not need to get changed
+        # but first-class support for pd.Interval needs to get added in the ORM.
+
+        # Raise pandas compatible error.
+        if len(set(labels)) != len(labels):
+            raise ValueError(
+                "labels must be unique if ordered=True; pass ordered=False "
+                "for duplicate labels"
+            )
+
+        # Raise pandas-compatible error.
+        if len(labels) + 1 != len(bins):
+            raise ValueError(
+                "Bin labels must be one fewer than the number of bin edges"
+            )
+
+        labels_frame = SnowflakeQueryCompiler.from_pandas(
+            pandas.DataFrame(labels)
+        )._modin_frame.ensure_row_position_column()
+
+        # Join with labels and return result from there, i.e. replace value of i with labels[i].
+        join_ret = join_utils.join(
+            bin_indices_frame,
+            labels_frame,
+            how="left",
+            left_on=[bin_indices_frame.data_column_snowflake_quoted_identifiers[0]],
+            right_on=[labels_frame.row_position_snowflake_quoted_identifier],
+        )
+
+        ret_frame = join_ret.result_frame.project_columns(
+            pandas_labels=[None],
+            column_objects=[
+                col(
+                    join_ret.result_column_mapper.right_quoted_identifiers_map[
+                        labels_frame.data_column_snowflake_quoted_identifiers[0]
+                    ]
+                )
+            ],
+        )
+
+        return bins, SnowflakeQueryCompiler(ret_frame)
diff --git a/src/snowflake/snowpark/modin/plugin/default2pandas/__init__.py b/src/snowflake/snowpark/modin/plugin/default2pandas/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/default2pandas/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py b/src/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py
new file mode 100644
index 00000000000..b526b06dea5
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py
@@ -0,0 +1,468 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""Modular houses default to pandas implementation with snowflake stored procedure"""
+
+import logging
+import os
+from collections import namedtuple
+from enum import Enum
+from typing import Any, Callable, Union
+
+import cloudpickle as pickle
+import pandas as native_pd
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark._internal.analyzer.analyzer_utils import quote_name
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    is_in_stored_procedure,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    get_default_snowpark_pandas_statement_params,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.session import Session
+from snowflake.snowpark.udf import UserDefinedFunction
+
+_logger = logging.getLogger(__name__)
+
+# Because Snowpark pandas API is not public and not part of Snowpark, it has to
+# be uploaded to a stage as the dependency of fallback stored procedure
+# We don't need it once Snowpark pandas API is merged to snowpark
+# current path of this file:
+# `src/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py`
+# So we need to be back to `snowflake/snowpark`
+# We can't go back to `snowflake/` directly, because `snowflake/` directory also contain
+# `connector` directory, which can't be used inside stored proc (inside the stored proc, we
+# use stored proc connector)
+SNOWPARK_PANDAS_IMPORT: Union[str, tuple[str, str]] = (
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
+    "snowflake.snowpark",
+)
+# currently users can only upload a zip file to use Snowpark pandas inside the stored proc
+# so the current path of this file:
+# snowflake.zip/snowflake/snowpark/modin/plugin/default2pandas/stored_procedure_utils.py
+# However, `snowflake.zip/snowflake` doesn't exist because Python UDFs use zipimport to import
+# the dependency so this file is not unzipped. Therefore, we can use `snowflake.zip` directly
+if is_in_stored_procedure():
+    SNOWPARK_PANDAS_IMPORT = os.path.dirname(
+        os.path.dirname(SNOWPARK_PANDAS_IMPORT[0])
+    )  # pragma: no cover
+
+# this requirement comes from modin:
+# https://github.com/modin-project/modin/blob/7c835a2761ede41d402c1febd29826c1d0b9512f/requirements/requirements-no-engine.yml#L10
+# As of 2024/03/19, the Snowflake conda channel doesn't have the latest version
+# of packaging, 24.0, but Modin 0.26.1, the version of Modin we are trying to
+# integrate with, specifies packaging>=21.0 with no upper bound. Users are
+# likely to install packaging 24.0, which would not be available when running
+# the stored procedure. Instead of using exactly the version of packaging that
+# we have locally, use the latest compatible version that's available in the
+# Snowflake conda channel.
+# TODO(SNOW-1254730): Once we finish integrating with Snowpark,
+# the only dependency we will need is snowpark python with the
+# Snowpark pandas extra, i.e. something like
+# snowflake-snowpark-python[snowpark-pandas]. That dependency will depend on
+# modin, and modin will transitively depend on third party pacakages like
+# `packaging`.
+PACKAGING_REQUIREMENT = "packaging>=21.0"
+
+MODIN_REQUIREMENT = "modin==0.28.1"
+
+FALLBACK_TAG = "FALLBACK"
+
+
+class SnowparkPandasObjectType(Enum):
+    """
+    Enum for supported Snowpark pandas object types.
+
+    Those are Snowpark pandas object that can be used as caller, args, keywords for
+    the pandas operation. The caller is typically SnowflakeQueryCompiler; the
+    Snowpark pandas object that can occur in args or keywords can be SnowflakeQueryCompiler,
+    Snowpark pandas DataFrame, and Snowpark pandas Series. There is currently no need to support other
+    Snowpark pandas object like DataFrameGroupBy etc.
+    """
+
+    QUERY_COMPILER = 1  # SnowflakeQueryCompiler
+    DATAFRAME = 2  # Snowpark pandas DataFrame
+    SERIES = 3  # Snowpark pandas Series
+
+
+# namedtuple for the stored procedure pickle data for Snowpark pandas object, which is used to
+# restore the Snowpark pandas object within or out of stored procedure
+SnowparkPandasObjectPickleData = namedtuple(
+    "SnowparkPandasObjectPickleData",
+    [
+        "object_type",  # type name for the Snowpark pandas object
+        "table_name",  # the temp table created out of the snowpark dataframe for the Snowpark pandas object
+        # internal frame properties
+        "data_column_pandas_labels",
+        "data_column_pandas_index_names",
+        "data_column_snowflake_quoted_identifiers",
+        "index_column_pandas_labels",
+        "index_column_snowflake_quoted_identifiers",
+        "ordering_columns",
+        "row_position_snowflake_quoted_identifier",
+    ],
+)
+
+
+class StoredProcedureDefault:
+    @classmethod
+    def _stage_and_extract_pickle_data(
+        cls, object_type: SnowparkPandasObjectType, internal_frame: InternalFrame
+    ) -> SnowparkPandasObjectPickleData:
+        """
+        Extract the pickle data for the internal frame and save the underlying Snowpark dataframe into
+        a temporary table. The pickle data contains the temporary table name, object type name and
+        properties of the internal frame.
+
+        Args:
+            object_type: SnowparkPandasObjectType. The object type for the Snowpark pandas object type.
+            internal_frame: InternalFrame. The internal frame representation of the Snowpark pandas object.
+
+        Returns:
+            SnowparkPandasObjectPickleData, the pickle data for the Snowpark pandas object.
+        """
+        # save the current snowpark df as a snowflake temporary table
+        temp_table_name = random_name_for_temp_object(TempObjectType.TABLE)
+        internal_frame.ordered_dataframe.write.save_as_table(
+            temp_table_name,
+            table_type="temporary",
+            statement_params=get_default_snowpark_pandas_statement_params(),
+        )
+
+        # data structure used to pass data in or out of stored proc
+        pickle_data = SnowparkPandasObjectPickleData(
+            object_type=object_type.name,
+            table_name=temp_table_name,
+            data_column_pandas_labels=internal_frame.data_column_pandas_labels,
+            data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=internal_frame.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
+            ordering_columns=internal_frame.ordering_columns,
+            row_position_snowflake_quoted_identifier=internal_frame.row_position_snowflake_quoted_identifier,
+        )
+        return pickle_data
+
+    @classmethod
+    def _recover_snowpark_pandas_object(
+        cls, session: Session, pickle_data: SnowparkPandasObjectPickleData
+    ) -> Union["DataFrame", "Series", "SnowflakeQueryCompiler"]:  # type: ignore[name-defined] # noqa: F821
+        """
+        Recover the Snowpark pandas object based on the SnowparkPandasObjectPickleData.
+
+        Returns:
+            Recovered Snowpark pandas object, it can be a Snowpark pandas DataFrame, Series
+            or SnowflakeQueryCompiler based on the object type.
+        """
+        from snowflake.snowpark.modin.pandas import DataFrame, Series
+        from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+            SnowflakeQueryCompiler,
+        )
+
+        # create snowpark dataframe from a snowflake table
+        ordered_dataframe = OrderedDataFrame(
+            DataFrameReference(session.table(pickle_data.table_name)),
+            ordering_columns=pickle_data.ordering_columns,
+            row_position_snowflake_quoted_identifier=pickle_data.row_position_snowflake_quoted_identifier,
+        )
+        # create the internal frame representation based on the snowpark dataframe and pickle data
+        internal_frame = InternalFrame.create(
+            ordered_dataframe=ordered_dataframe,
+            data_column_pandas_labels=pickle_data.data_column_pandas_labels,
+            data_column_pandas_index_names=pickle_data.data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=pickle_data.data_column_snowflake_quoted_identifiers,
+            index_column_pandas_labels=pickle_data.index_column_pandas_labels,
+            index_column_snowflake_quoted_identifiers=pickle_data.index_column_snowflake_quoted_identifiers,
+        )
+        # create a snowflake query compiler
+        query_compiler = SnowflakeQueryCompiler(internal_frame)
+
+        # recreate the Snowpark pandas object based on the object type
+        if pickle_data.object_type == SnowparkPandasObjectType.DATAFRAME.name:
+            result = DataFrame(query_compiler=query_compiler)
+        elif pickle_data.object_type == SnowparkPandasObjectType.SERIES.name:
+            result = Series(query_compiler=query_compiler)
+        else:
+            result = query_compiler
+
+        return result
+
+    @classmethod
+    def _try_pickle_snowpark_pandas_objects(
+        cls,
+        obj: Any,
+    ) -> tuple[Any, dict[str, SnowparkPandasObjectPickleData]]:
+        """
+        Try to extract Snowpark pandas pickle data from `obj` and all nested objects, and replace
+        the object with the name of the temporary table created for the Snowpark pandas object.
+
+        If the object is not a Snowpark pandas object, return the original `obj`.
+
+        Args:
+            obj: object for extracting the pickle data.
+
+        Returns:
+            the new object converted after pickle data is extracted
+            mapping between the temp table name and the corresponding pickle data
+
+        """
+        from snowflake.snowpark.modin.pandas import DataFrame, Series
+        from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+            SnowflakeQueryCompiler,
+        )
+
+        if isinstance(obj, SnowflakeQueryCompiler):
+            # the frontend typically process the Snowpark pandas object in arguments to
+            # SnowflakeQueryCompiler. For example: df1.add(df2), the df2 will be processed
+            # to df2._query_compiler when calling into SnowflakeQueryCompiler
+            pickle_data = cls._stage_and_extract_pickle_data(
+                SnowparkPandasObjectType.QUERY_COMPILER, obj._modin_frame
+            )
+            return pickle_data.table_name, {pickle_data.table_name: pickle_data}
+        if isinstance(obj, DataFrame):
+            pickle_data = cls._stage_and_extract_pickle_data(
+                SnowparkPandasObjectType.DATAFRAME,
+                obj._query_compiler._modin_frame,
+            )
+            return pickle_data.table_name, {pickle_data.table_name: pickle_data}
+        if isinstance(obj, Series):
+            pickle_data = cls._stage_and_extract_pickle_data(
+                SnowparkPandasObjectType.SERIES, obj._query_compiler._modin_frame
+            )
+            return pickle_data.table_name, {pickle_data.table_name: pickle_data}
+        if isinstance(obj, (list, tuple)):
+            obj_list = []
+            pickle_data_dict = {}
+            for o in obj:  # process each element of the list or tuple
+                (new_obj, pickle_datas) = cls._try_pickle_snowpark_pandas_objects(o)
+                obj_list.append(new_obj)
+                pickle_data_dict.update(pickle_datas)
+
+            return type(obj)(obj_list), pickle_data_dict
+        if isinstance(obj, dict):
+            key_dict = {}
+            pickle_data_dict = {}
+            for k, v in obj.items():  # process each entry in the dict
+                (new_obj, pickle_datas) = cls._try_pickle_snowpark_pandas_objects(v)
+                key_dict[k] = new_obj
+                pickle_data_dict.update(pickle_datas)
+            return key_dict, pickle_data_dict
+
+        return obj, {}
+
+    @classmethod
+    def _try_recover_snowpark_pandas_objects(
+        cls,
+        session: Session,
+        obj: Any,
+        pickle_data_dict: dict[str, SnowparkPandasObjectPickleData],
+    ) -> Any:
+        """
+        Try to recover the Snowpark pandas object from `obj` and all nested objects based on the
+        pickle data mapping. The object is replaced with the recovered Snowpark pandas object, if
+        there is no corresponding pickle data, the original object remains.
+
+        Args:
+            session: snowflake session used to connect with snowflake server.
+            obj: the obj we want to perform the recover process on.
+            pickle_data_dict: mapping between the object name and corresponding pickle data
+
+        returns:
+            new object after the recover process
+        """
+        if isinstance(obj, str):
+            # if the object have a corresponding entry in the pickle map, recovers
+            # the Snowpark pandas object, otherwise, return the original object
+            if obj in pickle_data_dict:
+                return cls._recover_snowpark_pandas_object(
+                    session, pickle_data_dict[obj]
+                )
+            else:
+                return obj
+        if isinstance(obj, (list, tuple)):
+            obj_list = []
+            for o in obj:
+                new_obj = cls._try_recover_snowpark_pandas_objects(
+                    session, o, pickle_data_dict
+                )
+                obj_list.append(new_obj)
+
+            return type(obj)(obj_list)
+        if isinstance(obj, dict):
+            key_dict = {}
+            for k, v in obj.items():
+                new_obj = cls._try_recover_snowpark_pandas_objects(
+                    session, v, pickle_data_dict
+                )
+                key_dict[k] = new_obj
+            return key_dict
+
+        return obj
+
+    @classmethod
+    def register(
+        cls,
+        frame: InternalFrame,
+        pandas_op: Callable,
+        args: Any,
+        kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":  # type: ignore[name-defined]  # noqa: F821
+        """
+        Register and run pandas operation using stored procedure. Proper pre-processing and post-processing
+        of Snowpark pandas object is applied on the caller object, input arguments and keyword mapping.
+
+        Returns:
+            SnowflakeQueryCompiler that is created out of the operation result. Note that all result of the
+            pandas operation will be converted to dataframe format.
+
+        """
+        from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+            SnowflakeQueryCompiler,
+        )
+
+        def args_kwargs_str(args: Any, kwargs: Any) -> str:
+            """
+            Convert args and kwargs to a string representation.
+            For a Snowpark pandas DataFrame/Series, we just retrieve its type name and
+            avoid calling `str()` (which triggers query execution) directly.
+            """
+
+            def arg_str(arg: Any) -> str:
+                if isinstance(arg, (list, tuple)):
+                    return ",".join([arg_str(e) for e in arg])
+                elif isinstance(arg, dict):
+                    return ",".join(f"{k}: {arg_str(v)}" for k, v in arg.items())
+                elif isinstance(arg, (pd.DataFrame, pd.Series)):
+                    return str(type(arg))
+                return str(arg)
+
+            return f"args=({arg_str(args)}) and kwargs={{{arg_str(kwargs)}}}"
+
+        _logger.debug(
+            f"Default to (native) pandas fallback using stored_procedure for {pandas_op.__name__} "
+            f"with {args_kwargs_str(args, kwargs)}"
+        )
+        WarningMessage.single_warning(
+            f"Falling back to native pandas with a stored procedure for {pandas_op.__name__}. "
+            + "Execution of this method could be slow. Please contact snowflake for complete "
+            + "support of this feature."
+        )
+
+        # process caller object, caller object is always going to be snowflake query compiler
+        caller_data = cls._stage_and_extract_pickle_data(
+            SnowparkPandasObjectType.QUERY_COMPILER, internal_frame=frame
+        )
+
+        session: Session = pd.session
+
+        # For DataFrame.apply, the function may be given as a Snowpark UserDefinedFunction. Extract
+        # packages from it, and add to registration below.
+        udf_packages = []
+        if pandas_op.__name__ == "<function DataFrame.apply>":
+            func = kwargs["func"]
+            if isinstance(func, UserDefinedFunction):
+                udf_packages = func._packages
+                # Use underlying, non-decorated function within stored proc.
+                kwargs["func"] = func.func
+
+        (processed_args, args_pickle_data) = cls._try_pickle_snowpark_pandas_objects(
+            args
+        )
+        (
+            processed_kwargs,
+            kwargs_pickle_data,
+        ) = cls._try_pickle_snowpark_pandas_objects(kwargs)
+
+        # default_to_pandas is a stored procedure that is executed within snowflake, which can not
+        # be recognized by code coverage, mark all code of this function with pragma: no cover to
+        # skip the code coverage check.
+        def default_to_pandas(session: Session) -> bytes:
+            # post process caller object
+            caller_compiler = cls._recover_snowpark_pandas_object(
+                session, caller_data
+            )  # pragma: no cover
+            # create pandas df from the compiler
+            native_df = caller_compiler.to_pandas()  # pragma: no cover
+
+            post_args = cls._try_recover_snowpark_pandas_objects(  # pragma: no cover
+                session, processed_args, args_pickle_data
+            )
+            post_kwargs = cls._try_recover_snowpark_pandas_objects(  # pragma: no cover
+                session, processed_kwargs, kwargs_pickle_data
+            )
+
+            # perform unsupported pandas.DataFrame operation
+            df = pandas_op(native_df, *post_args, **post_kwargs)  # pragma: no cover
+
+            # create a snowflake from a pandas df
+            sf_compiler = SnowflakeQueryCompiler.from_pandas(df)  # pragma: no cover
+            result_pickle_data = cls._stage_and_extract_pickle_data(  # pragma: no cover
+                SnowparkPandasObjectType.QUERY_COMPILER, sf_compiler._modin_frame
+            )
+
+            return pickle.dumps(result_pickle_data)  # pragma: no cover
+
+        sp_name = (
+            f"{random_name_for_temp_object(TempObjectType.PROCEDURE)}{FALLBACK_TAG}"
+        )
+
+        if "snowflake-snowpark-python" not in session.get_packages():
+            # need snowflake-snowpark-python to install required dependencies
+            # e.g., pandas, stored procedure connector
+            # we only need to add package once to skip the validation on the client side
+            session.add_packages("snowflake-snowpark-python")
+
+        packages = list(session.get_packages().values()) + udf_packages
+        if "pandas" not in packages:
+            # Use the current pandas version to ensure the behavior consistency
+            packages = [native_pd] + packages
+
+        if "packages" not in packages:
+            packages.append(PACKAGING_REQUIREMENT)
+
+        if "modin" not in packages:
+            packages.append(MODIN_REQUIREMENT)
+
+        # register stored procedure
+        default_to_pandas_sp = session.sproc.register(
+            default_to_pandas,
+            name=sp_name,
+            imports=[SNOWPARK_PANDAS_IMPORT],
+            source_code_display=False,
+            packages=packages,
+            # TODO: SNOW-940730 Use anonymous stored procedure to avoid procedure creation
+            #  once the server side bug is fixed
+            statement_params=get_default_snowpark_pandas_statement_params(),
+        )
+
+        # call stored proc
+        encoded_pickle_data = default_to_pandas_sp(
+            statement_params=get_default_snowpark_pandas_statement_params()
+        )
+        pickle_data = pickle.loads(encoded_pickle_data)
+
+        # clean up temp tables and stored proc
+        table_to_drop = (
+            [caller_data.table_name]
+            + list(args_pickle_data.keys())
+            + list(kwargs_pickle_data.keys())
+        )
+        for table_name in table_to_drop:
+            session.sql(f"drop table if exists {quote_name(table_name)}").collect(
+                statement_params=get_default_snowpark_pandas_statement_params()
+            )
+        session.sql(f"drop procedure if exists {sp_name}()").collect(
+            statement_params=get_default_snowpark_pandas_statement_params()
+        )
+
+        return cls._recover_snowpark_pandas_object(session, pickle_data)
diff --git a/src/snowflake/snowpark/modin/plugin/dev-requirements.txt b/src/snowflake/snowpark/modin/plugin/dev-requirements.txt
new file mode 100644
index 00000000000..5346931b1bc
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/dev-requirements.txt
@@ -0,0 +1,2 @@
+psutil
+pandas=2.2.1
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/__init__.py b/src/snowflake/snowpark/modin/plugin/docstrings/__init__.py
new file mode 100644
index 00000000000..5564846d244
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/__init__.py
@@ -0,0 +1,25 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains objects whose documentation will override Modin's documentation."""
+
+from snowflake.snowpark.modin.plugin.docstrings.base import BasePandasDataset
+from snowflake.snowpark.modin.plugin.docstrings.dataframe import DataFrame
+from snowflake.snowpark.modin.plugin.docstrings.groupby import (
+    DataFrameGroupBy,
+    SeriesGroupBy,
+)
+from snowflake.snowpark.modin.plugin.docstrings.resample import Resampler
+from snowflake.snowpark.modin.plugin.docstrings.series import Series
+from snowflake.snowpark.modin.plugin.docstrings.window import Rolling
+
+__all__ = [
+    "BasePandasDataset",
+    "DataFrame",
+    "DataFrameGroupBy",
+    "Resampler",
+    "Rolling",
+    "Series",
+    "SeriesGroupBy",
+]
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
new file mode 100644
index 00000000000..d59cf1d6cc6
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py
@@ -0,0 +1,3248 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains BasePandasDataset docstrings that override modin's docstrings."""
+
+from pandas.util._decorators import doc
+
+from snowflake.snowpark.modin.pandas.shared_docs import _shared_docs
+from snowflake.snowpark.modin.pandas.utils import _doc_binary_op
+
+_doc_binary_op_kwargs = {"returns": "BasePandasDataset", "left": "BasePandasDataset"}
+
+
+_shared_docs[
+    "stat_func_example"
+] = """
+
+Examples
+--------
+>>> idx = pd.MultiIndex.from_arrays([
+...     ['warm', 'warm', 'cold', 'cold'],
+...     ['dog', 'falcon', 'fish', 'spider']],
+...     names=['blooded', 'animal'])
+>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
+>>> s
+blooded  animal
+warm     dog       4
+         falcon    2
+cold     fish      0
+         spider    8
+Name: legs, dtype: int64
+
+>>> s.{stat_func}()
+{default_output}"""
+
+_max_examples: str = _shared_docs["stat_func_example"].format(
+    stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8
+)
+
+_min_examples: str = _shared_docs["stat_func_example"].format(
+    stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0
+)
+
+_mean_examples: str = _shared_docs["stat_func_example"].format(
+    stat_func="mean",
+    verb="Mean",
+    default_output="3.5",
+    level_output_0=2,
+    level_output_1=0,
+)
+
+_median_examples: str = _shared_docs["stat_func_example"].format(
+    stat_func="median",
+    verb="Median",
+    default_output="3.0",
+    level_output_0=2,
+    level_output_1=0,
+)
+
+_sum_examples = _shared_docs["stat_func_example"].format(
+    stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8
+)
+
+_sum_examples += """
+
+By default, the sum of an empty or all-NA Series is ``0``.
+
+>>> pd.Series([], dtype="float64").sum()  # min_count=0 is the default
+0.0
+
+This can be controlled with the ``min_count`` parameter. For example, if
+you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
+
+>>> pd.Series([], dtype="float64").sum(min_count=1)
+nan
+
+Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
+empty series identically.
+
+>>> pd.Series([np.nan]).sum()
+0.0
+
+>>> pd.Series([np.nan]).sum(min_count=1)
+nan"""
+
+_num_doc = """
+{desc}
+
+Parameters
+----------
+axis : {axis_descr}
+    Axis for the function to be applied on.
+    For `Series` this parameter is unused and defaults to 0.
+skipna : bool, default True
+    Exclude NA/null values when computing the result.
+numeric_only : bool, default False
+    If True, Include only float, int, boolean columns. Not implemented for Series.
+**kwargs
+    Additional keyword arguments to be passed to the function.
+
+Returns
+-------
+{name1}\
+{see_also}\
+{examples}
+"""
+
+_num_min_count_doc = """
+{desc}
+
+Parameters
+----------
+axis : {axis_descr}
+    Axis for the function to be applied on.
+    For `Series` this parameter is unused and defaults to 0.
+skipna : bool, default True
+    Exclude NA/null values when computing the result.
+numeric_only : bool, default False
+    If True, Include only float, int, boolean columns. Not implemented for Series.
+min_count : int, default {min_count}
+    The required number of valid values to perform the operation. If fewer than min_count non-NA values
+    are present the result will be NA.
+
+**kwargs
+    Additional keyword arguments to be passed to the function.
+
+Returns
+-------
+{name1}\
+{see_also}\
+{examples}
+"""
+
+_num_ddof_doc = """
+{desc}
+
+Parameters
+----------
+axis : {axis_descr}
+    For `Series` this parameter is unused and defaults to 0.
+skipna : bool, default True
+    Exclude NA/null values. If an entire row/column is NA, the result
+    will be NA.
+ddof : int, default 1
+    Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+    where N represents the number of elements.
+numeric_only : bool, default False
+    If True, Include only float, int, boolean columns. Not implemented for Series.
+
+Returns
+-------
+{name1}\
+{notes}\
+{examples}
+"""
+
+_std_notes = """
+
+Notes
+-----
+To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the
+default `ddof=1`)"""
+
+_std_examples = """
+
+Examples
+--------
+>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
+...                   'age': [21, 25, 62, 43],
+...                   'height': [1.61, 1.87, 1.49, 2.01]}
+...                  ).set_index('person_id')
+>>> df    # doctest: +NORMALIZE_WHITESPACE
+           age  height
+person_id
+0           21    1.61
+1           25    1.87
+2           62    1.49
+3           43    2.01
+
+The standard deviation of the columns can be found as follows:
+
+>>> df.std()
+age       18.786076
+height     0.237417
+dtype: float64
+
+Alternatively, `ddof=0` can be set to normalize by N instead of N-1:
+
+>>> df.std(ddof=0)
+age       16.269219
+height     0.205609
+dtype: float64"""
+
+_var_examples = """
+
+Examples
+--------
+>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
+...                   'age': [21, 25, 62, 43],
+...                   'height': [1.61, 1.87, 1.49, 2.01]}
+...                  ).set_index('person_id')
+>>> df    # doctest: +NORMALIZE_WHITESPACE
+           age  height
+person_id
+0           21    1.61
+1           25    1.87
+2           62    1.49
+3           43    2.01
+
+>>> df.var()
+age       352.916667
+height      0.056367
+dtype: float64
+
+Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1:
+
+>>> df.var(ddof=0)
+age       264.687500
+height      0.042275
+dtype: float64"""
+
+_name1 = "Series"
+_name2 = "DataFrame"
+_axis_descr = "{index (0), columns (1)}"
+
+
+_bool_doc = """
+{desc}
+
+Parameters
+----------
+axis : {{0 or 'index', 1 or 'columns', None}}, default 0
+    Indicate which axis or axes should be reduced. For `Series` this parameter
+    is unused and defaults to 0.
+
+    * 0 / 'index' : reduce the index, return a Series whose index is the
+      original column labels.
+    * 1 / 'columns' : reduce the columns, return a Series whose index is the
+      original index.
+    * None : reduce all axes, return a scalar.
+
+bool_only : bool, default False
+    Include only boolean columns. Not implemented for Series.
+skipna : bool, default True
+    Exclude NA/null values. If the entire row/column is NA and skipna is
+    True, then the result will be {empty_value}, as for an empty row/column.
+    If skipna is False, then NA are treated as True, because these are not
+    equal to zero.
+**kwargs : any, default None
+    Additional keywords have no effect but might be accepted for
+    compatibility with NumPy.
+
+Returns
+-------
+{name1}
+
+Notes
+-----
+* Snowpark pandas currently only supports this function on DataFrames/Series with integer or boolean
+  columns.
+
+{see_also}
+{examples}"""
+
+_all_examples = """\
+Examples
+--------
+**Series**
+
+>>> pd.Series([True, True]).all()
+True
+>>> pd.Series([True, False]).all()
+False
+
+**DataFrames**
+
+Create a dataframe from a dictionary.
+
+>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})
+>>> df
+   col1   col2
+0  True   True
+1  True  False
+
+Default behaviour checks if values in each column all return True.
+
+>>> df.all()
+col1     True
+col2    False
+dtype: bool
+
+Specify ``axis='columns'`` to check if values in each row all return True.
+
+>>> df.all(axis='columns')
+0     True
+1    False
+dtype: bool
+
+Or ``axis=None`` for whether every value is True.
+
+>>> df.all(axis=None)
+False
+"""
+
+_all_see_also = """\
+See Also
+--------
+Series.all : Return True if all elements are True.
+DataFrame.any : Return True if one (or more) elements are True.
+"""
+
+_any_see_also = """\
+See Also
+--------
+numpy.any : Numpy version of this method.
+Series.any : Return whether any element is True.
+Series.all : Return whether all elements are True.
+DataFrame.any : Return whether any element is True over requested axis.
+DataFrame.all : Return whether all elements are True over requested axis.
+"""
+
+_any_examples = """\
+Examples
+--------
+**Series**
+
+For Series input, the output is a scalar indicating whether any element
+is True.
+
+>>> pd.Series([False, False]).any()
+False
+>>> pd.Series([True, False]).any()
+True
+
+**DataFrame**
+
+Whether each column contains at least one True element (the default).
+
+>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})
+>>> df
+   A  B  C
+0  1  0  0
+1  2  2  0
+
+>>> df.any()
+A     True
+B     True
+C    False
+dtype: bool
+
+Aggregating over the columns.
+
+>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})
+>>> df
+       A  B
+0   True  1
+1  False  2
+
+>>> df.any(axis='columns')
+0    True
+1    True
+dtype: bool
+
+>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})
+>>> df
+       A  B
+0   True  1
+1  False  0
+
+>>> df.any(axis='columns')
+0     True
+1    False
+dtype: bool
+
+Aggregating over the entire DataFrame with ``axis=None``.
+
+>>> df.any(axis=None)
+True
+
+`any` for an empty DataFrame is an empty Series.
+
+>>> pd.DataFrame([]).any()
+Series([], dtype: bool)
+"""
+
+
+class BasePandasDataset:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    """
+    Implement most of the common code that exists in DataFrame/Series.
+
+    Since both objects share the same underlying representation, and the algorithms
+    are the same, we use this object to define the general behavior of those objects
+    and then use those objects to define the output type.
+
+    TelemetryMeta is a metaclass that automatically add telemetry decorators to classes/instance methods.
+    See TelemetryMeta for details. Note: Its subclasses will inherit this metaclass.
+    """
+
+    def abs():
+        """
+        Return a `BasePandasDataset` with absolute numeric value of each element.
+        """
+
+    def set_axis():
+        """
+        Assign desired index to given axis.
+        """
+
+    def add():
+        """
+        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`).
+        """
+
+    def aggregate():
+        """
+        Aggregate using one or more operations over the specified axis.
+        """
+
+    agg = aggregate
+
+    def align():
+        """
+        Align two objects on their axes with the specified join method.
+        """
+
+    @doc(
+        _bool_doc,
+        desc="Return whether all elements are True, potentially over an axis.\n\n"
+        "Returns True unless there at least one element within a series or "
+        "along a Dataframe axis that is False or equivalent (e.g. zero or "
+        "empty).",
+        see_also=_all_see_also,
+        examples=_all_examples,
+        name1="Series",
+        empty_value="True",
+    )
+    def all():
+        pass
+
+    @doc(
+        _bool_doc,
+        desc="Return whether any element are True, potentially over an axis.\n\n"
+        "Returns False unless there at least one element within a series or "
+        "along a Dataframe axis that is True or equivalent (e.g. non-zero or "
+        "non-empty).",
+        see_also=_any_see_also,
+        examples=_any_examples,
+        name1="Series",
+        empty_value="False",
+    )
+    def any():
+        pass
+
+    def apply():
+        """
+        Apply a function along an axis of the `BasePandasDataset`.
+        """
+
+    def asfreq():
+        """
+        Convert time series to specified frequency.
+        """
+
+    def asof():
+        """
+        Return the last row(s) without any NaNs before `where`.
+        """
+
+    def astype():
+        """
+        Cast a pandas object to a specified dtype ``dtype``.
+
+        Parameters
+        ----------
+        dtype : str, data type, Series or Mapping of column name -> data type
+            Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to
+            cast entire pandas object to the same type. Alternatively, use a
+            mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
+            a numpy.dtype or Python type to cast one or more of the DataFrame's
+            columns to column-specific types.
+        copy : bool, default True
+            Return a copy (i.e., a new object) when ``copy=True``; otherwise, astype
+            operates inplace  (be very careful setting ``copy=False`` as changes to
+            values then may propagate to other pandas objects).
+        errors : {'raise', 'ignore'}, default 'raise'
+            Control raising of exceptions on invalid data for provided dtype.
+
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object.
+
+        Returns
+        -------
+        same type as caller (Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`)
+
+        Examples
+        --------
+        Create a DataFrame:
+
+        >>> d = {'col1': [1, 2], 'col2': [3, 4]}
+        >>> df = pd.DataFrame(data=d)
+        >>> df.dtypes
+        col1    int64
+        col2    int64
+        dtype: object
+
+        Cast all columns to int32 (dtypes will be int64 since Snowpark pandas API will cast all integers to int64):
+
+        >>> df.astype('int32').dtypes
+        col1    int64
+        col2    int64
+        dtype: object
+
+        Cast col1 to float64 using a dictionary:
+
+        >>> df.astype({'col1': 'float64'}).dtypes
+        col1    float64
+        col2      int64
+        dtype: object
+
+        Create a series:
+
+        >>> ser = pd.Series([1, 2], dtype=str)
+        >>> ser
+        0    1
+        1    2
+        dtype: object
+        >>> ser.astype('float64')
+        0    1.0
+        1    2.0
+        dtype: float64
+
+        """
+
+    @property
+    def at():
+        """
+        Get a single value for a row/column label pair.
+        """
+
+    def at_time():
+        """
+        Select values at particular time of day (e.g., 9:30AM).
+        """
+
+    def between_time():
+        """
+        Select values between particular times of the day (e.g., 9:00-9:30 AM).
+
+        By setting start_time to be later than end_time, you can get the times that are not between the two times.
+        """
+        pass
+
+    def bfill():
+        """
+        Synonym for `DataFrame.fillna` with ``method='bfill'``.
+        """
+
+    backfill = bfill
+
+    def bool():
+        """
+        Return the bool of a single element `BasePandasDataset`.
+        """
+
+    def clip():
+        """
+        Trim values at input threshold(s).
+        """
+
+    def combine():
+        """
+        Perform combination of `BasePandasDataset`-s according to `func`.
+        """
+
+    def combine_first():
+        """
+        Update null elements with value in the same location in `other`.
+        """
+
+    def copy():
+        """
+        Make a copy of this object's indices and data.
+
+        When ``deep=True`` (default), a new object will be created with a
+        copy of the calling object's data and indices. Modifications to
+        the data or indices of the copy will not be reflected in the
+        original object (see examples below). In Snowpark pandas API this will not copy the
+        underlying table/data, but we create a logical copy of data and indices to
+        provide same sematics as native pandas.
+
+        When ``deep=False``, a new object will be created without copying the calling
+        object's data or index (only references to the data and index are copied).
+        Any changes to the data of the original will be reflected in the shallow copy
+        (and vice versa).
+
+        Parameters
+        ----------
+        deep : bool, default True
+            Make a deep copy, including a locial copy of the data and the indices.
+            With ``deep=False`` neither the indices nor the data are copied.
+
+        Returns
+        -------
+        copy : Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Object type matches caller.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2], index=["a", "b"])
+        >>> s
+        a    1
+        b    2
+        dtype: int64
+
+        >>> s_copy = s.copy()
+        >>> s_copy
+        a    1
+        b    2
+        dtype: int64
+
+        **Shallow copy versus default (deep) copy:**
+
+        >>> s = pd.Series([1, 2], index=["a", "b"])
+        >>> deep = s.copy()
+        >>> shallow = s.copy(deep=False)
+
+        Updates to the data shared by shallow copy and original is reflected
+        in both; deep copy remains unchanged.
+
+        >>> s.sort_values(ascending=False, inplace=True)
+        >>> shallow.sort_values(ascending=False, inplace=True)
+        >>> s
+        b    2
+        a    1
+        dtype: int64
+        >>> shallow
+        b    2
+        a    1
+        dtype: int64
+        >>> deep
+        a    1
+        b    2
+        dtype: int64
+        """
+
+    def count():
+        """
+        Count non-NA cells for each column or row.
+
+        The values `None`, `NaN`, `NaT` are considered NA.
+
+        Parameters
+        ----------
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            If 0 or 'index' counts are generated for each column.
+            If 1 or 'columns' counts are generated for each row. Not supported yet.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`
+            For each column/row the number of non-NA/null entries.
+
+        See Also
+        --------
+        Series.count: Number of non-NA elements in a Series.
+        DataFrame.value_counts: Count unique combinations of columns.
+        DataFrame.shape: Number of DataFrame rows and columns (including NA
+            elements).
+        DataFrame.isna: Boolean same-sized DataFrame showing places of NA
+            elements.
+
+        Examples
+        --------
+        Constructing DataFrame from a dictionary:
+
+        >>> df = pd.DataFrame({"Person":
+        ...                    ["John", "Myla", "Lewis", "John", "Myla"],
+        ...                    "Age": [24., np.nan, 21., 33, 26],
+        ...                    "Single": [False, True, True, True, False]})
+        >>> df   # doctest: +NORMALIZE_WHITESPACE
+           Person   Age  Single
+        0    John  24.0   False
+        1    Myla   NaN    True
+        2   Lewis  21.0    True
+        3    John  33.0    True
+        4    Myla  26.0   False
+
+        Notice the uncounted NA values:
+
+        >>> df.count()
+        Person    5
+        Age       4
+        Single    5
+        dtype: int64
+        """
+
+    def cummax():
+        """
+        Return cumulative maximum over a `BasePandasDataset` axis.
+
+        Parameters
+        --------
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            The index or the name of the axis. 0 is equivalent to None or 'index'. For Series this parameter is unused and defaults to 0.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+        *args, **kwargs :
+            Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Series or DataFrame
+            Return cumulative maximum of Series or DataFrame.
+
+        Examples
+        --------
+        Series
+
+        >>> s = pd.Series([2, np.nan, 5, -1, 0])
+        >>> s
+        0    2.0
+        1    NaN
+        2    5.0
+        3   -1.0
+        4    0.0
+        dtype: float64
+
+        By default, NA values are ignored.
+
+        >>> s.cummax()
+        0    2.0
+        1    NaN
+        2    5.0
+        3    5.0
+        4    5.0
+        dtype: float64
+
+        To include NA values in the operation, use skipna=False:
+
+        >>> s.cummax(skipna=False)
+        0    2.0
+        1    NaN
+        2    NaN
+        3    NaN
+        4    NaN
+        dtype: float64
+
+        DataFrame
+
+        >>> df = pd.DataFrame([[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list('AB'))
+        >>> df
+             A    B
+        0  2.0  1.0
+        1  3.0  NaN
+        2  1.0  0.0
+
+        By default, iterates over rows and finds the maximum in each column. This is equivalent to axis=None or axis='index'.
+
+        >>> df.cummax()
+             A    B
+        0  2.0  1.0
+        1  3.0  NaN
+        2  3.0  1.0
+        """
+
+    def cummin():
+        """
+        Return cumulative minimum over a `BasePandasDataset` axis.
+
+        Parameters
+        --------
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            The index or the name of the axis. 0 is equivalent to None or 'index'. For Series this parameter is unused and defaults to 0.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+        *args, **kwargs :
+            Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Series or DataFrame
+            Return cumulative minimum of Series or DataFrame.
+
+        Examples
+        --------
+        Series
+
+        >>> s = pd.Series([2, np.nan, 5, -1, 0])
+        >>> s
+        0    2.0
+        1    NaN
+        2    5.0
+        3   -1.0
+        4    0.0
+        dtype: float64
+
+        By default, NA values are ignored.
+
+        >>> s.cummin()
+        0    2.0
+        1    NaN
+        2    2.0
+        3   -1.0
+        4   -1.0
+        dtype: float64
+
+        To include NA values in the operation, use skipna=False:
+
+        >>> s.cummin(skipna=False)
+        0    2.0
+        1    NaN
+        2    NaN
+        3    NaN
+        4    NaN
+        dtype: float64
+
+        DataFrame
+
+        >>> df = pd.DataFrame([[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list('AB'))
+        >>> df
+             A    B
+        0  2.0  1.0
+        1  3.0  NaN
+        2  1.0  0.0
+
+        By default, iterates over rows and finds the minimum in each column. This is equivalent to axis=None or axis='index'.
+
+        >>> df.cummin()
+             A    B
+        0  2.0  1.0
+        1  2.0  NaN
+        2  1.0  0.0
+        """
+
+    def cumprod():
+        """
+        Return cumulative product over a `BasePandasDataset` axis.
+        """
+
+    def cumsum():
+        """
+        Return cumulative sum over a `BasePandasDataset` axis.
+
+        Parameters
+        --------
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            The index or the name of the axis. 0 is equivalent to None or 'index'. For Series this parameter is unused and defaults to 0.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+        *args, **kwargs :
+            Additional keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Series or DataFrame
+            Return cumulative sum of Series or DataFrame.
+
+        Examples
+        --------
+        Series
+
+        >>> s = pd.Series([2, np.nan, 5, -1, 0])
+        >>> s
+        0    2.0
+        1    NaN
+        2    5.0
+        3   -1.0
+        4    0.0
+        dtype: float64
+
+        By default, NA values are ignored.
+
+        >>> s.cumsum()
+        0    2.0
+        1    NaN
+        2    7.0
+        3    6.0
+        4    6.0
+        dtype: float64
+
+        To include NA values in the operation, use skipna=False:
+
+        >>> s.cumsum(skipna=False)
+        0    2.0
+        1    NaN
+        2    NaN
+        3    NaN
+        4    NaN
+        dtype: float64
+
+        DataFrame
+
+        >>> df = pd.DataFrame([[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list('AB'))
+        >>> df
+             A    B
+        0  2.0  1.0
+        1  3.0  NaN
+        2  1.0  0.0
+
+        By default, iterates over rows and finds the sum in each column. This is equivalent to axis=None or axis='index'.
+
+        >>> df.cumsum()
+             A    B
+        0  2.0  1.0
+        1  5.0  NaN
+        2  6.0  1.0
+        """
+
+    def describe():
+        """
+        Generate descriptive statistics for columns in the dataset.
+
+        For non-numeric columns, computes `count` (# of non-null items), `unique` (# of unique items),
+        `top` (the mode; the element at the lowest position if multiple), and `freq` (# of times the mode appears)
+        for each column.
+
+        For numeric columns, computes `count` (# of non-null items), `mean`, `std`, `min`,
+        the specified percentiles, and `max` for each column.
+
+        If both non-numeric and numeric columns are specified, the rows for statistics of
+        non-numeric columns appear first in the output.
+
+        Parameters
+        ----------
+        percentiles: Optional[ListLike], default None
+            The percentiles to compute for numeric columns. If unspecified, defaults to [0.25, 0.5, 0.75],
+            which returns the 25th, 50th, and 75th percentiles. All values should fall between 0 and 1.
+            The median (0.5) will always be added to the displayed percentile if not already included;
+            the min and max are always displayed in addition to the percentiles.
+        include: Optional[List[str, ExtensionDtype | np.dtype]] | "all", default None
+            A list of dtypes to include in the result (ignored for Series).
+
+            * "all": Include all columns in the output.
+            * list-like: Include only columns of the listed dtypes. To limit the result to numeric
+              types submit `numpy.number`. To limit it instead to object columns submit the
+              `numpy.object` data type. Strings can also be used in the style of `select_dtypes`
+              (e.g. `df.describe(include=['O'])`).
+            * None: If the dataframe has at least one numeric column, then include only numeric
+              columns; otherwise include all columns in the output.
+
+        exclude: Optional[List[str, ExtensionDtype | np.dtype]], default None
+            A list of dtypes to omit from the result (ignored for Series).
+
+            * list-like: Exclude all columns of the listed dtypes. To exclude numeric types submit
+              `numpy.number`. To exclude object columns submit the data type `numpy.object`. Strings
+              can also be used in the style of `select_dtypes` (e.g. `df.describe(exclude=['O'])`).
+            * None: Exclude nothing.
+
+        Returns
+        -------
+        BasePandasDataset
+            Snowpark DataFrame if this was a DataFrame, and Snowpark Series if this was a Series.
+            Each column contains statistics for the corresponding column in the input dataset.
+
+        Examples
+        --------
+        Describing a frame with both numeric and object columns:
+
+        >>> df = pd.DataFrame({'numeric': [1, 2, 3],
+        ...                    'object': ['a', 'b', 'c']
+        ...                   })
+        >>> df.describe(include='all') # doctest: +NORMALIZE_WHITESPACE
+                numeric object
+        count       3.0      3
+        unique      NaN      3
+        top         NaN      a
+        freq        NaN      1
+        mean        2.0   None
+        std         1.0   None
+        min         1.0   None
+        25%         1.5   None
+        50%         2.0   None
+        75%         2.5   None
+        max         3.0   None
+
+        Describing only numeric columns:
+
+        >>> pd.DataFrame({'numeric': [1, 2, 3], 'object': ['a', 'b', 'c']}).describe(include='number') # doctest: +NORMALIZE_WHITESPACE
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
+
+        Excluding numeric columns:
+
+        >>> pd.DataFrame({'numeric': [1, 2, 3], 'object': ['a', 'b', 'c']}).describe(exclude='number') # doctest: +NORMALIZE_WHITESPACE
+               object
+        count       3
+        unique      3
+        top         a
+        freq        1
+        """
+
+    def diff():
+        """
+        First discrete difference of element.
+        """
+
+    def drop():
+        """
+        Drop specified labels from `BasePandasDataset`.
+        """
+
+    def droplevel():
+        """
+        Return `BasePandasDataset` with requested index / column level(s) removed.
+        """
+
+    def drop_duplicates():
+        """
+        Return `BasePandasDataset` with duplicate rows removed.
+        """
+
+    def mask():
+        """
+        Replace values where the condition is True.
+        """
+
+    def where():
+        """
+        Replace values where the condition is False.
+        """
+
+    def eq():
+        """
+        Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`).
+        """
+
+    def explode():
+        """
+        Transform each element of a list-like to a row.
+        """
+
+    def ewm():
+        """
+        Provide exponentially weighted (EW) calculations.
+        """
+
+    def expanding():
+        """
+        Provide expanding window calculations.
+        """
+
+    def ffill():
+        """
+        Synonym for `DataFrame.fillna` with ``method='ffill'``.
+        """
+
+    pad = ffill
+
+    def fillna():
+        """
+        Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        self_is_series : bool
+            If True then self contains a Series object, if False then self contains
+            a DataFrame object.
+        value : scalar, dict, Series, or DataFrame, default: None
+            Value to use to fill holes (e.g. 0), alternately a
+            dict/Series/DataFrame of values specifying which value to use for
+            each index (for a Series) or column (for a DataFrame).  Values not
+            in the dict/Series/DataFrame will not be filled. This value cannot
+            be a list.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use next valid observation to fill gap.
+        axis : {None, 0, 1}, default: None
+            Axis along which to fill missing values.
+        inplace : bool, default: False
+            If True, fill in-place. Note: this will modify any
+            other views on this object (e.g., a no-copy slice for a column in a
+            DataFrame).
+        limit : int, default: None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled. Must be greater than 0 if not None.
+        downcast : dict, default: None
+            A dict of item->dtype of what to downcast if possible,
+            or the string 'infer' which will try to downcast to an appropriate
+            equal type (e.g. float64 to int64 if possible).
+
+        Returns
+        -------
+        Series, DataFrame or None
+            Object with missing values filled or None if ``inplace=True``.
+        """
+
+    def filter():
+        """
+        Subset the `BasePandasDataset` rows or columns according to the specified index labels.
+        """
+
+    def first():
+        """
+        Select initial periods of time series data based on a date offset.
+        """
+
+    def first_valid_index():
+        """
+        Return index for first non-NA value or None, if no non-NA value is found.
+
+        Returns
+        -------
+        scalar or None, Tuple of scalars if MultiIndex
+
+        Examples
+        --------
+        >>> s = pd.Series([None, 3, 4])
+        >>> s.first_valid_index()
+        1
+        >>> s = pd.Series([None, None])
+        >>> s.first_valid_index()
+        >>> df = pd.DataFrame({'A': [None, 1, 2, None], 'B': [3, 2, 1, None]}, index=[10, 11, 12, 13])
+        >>> df
+              A    B
+        10  NaN  3.0
+        11  1.0  2.0
+        12  2.0  1.0
+        13  NaN  NaN
+        >>> df.first_valid_index()
+        10
+        >>> df = pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"])
+        >>> df.first_valid_index()
+        'i'
+        """
+
+    @property
+    def flags():
+        pass
+
+    def floordiv():
+        """
+        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`).
+        """
+
+    def ge():
+        """
+        Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`).
+        """
+
+    def get():
+        """
+        Get item from object for given key (ex: DataFrame column).
+
+        Returns default value if not found.
+
+        Parameters
+        ----------
+        key : object
+
+        Returns
+        -------
+        same type as items contained in object
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     [
+        ...         [24.3, 75.7, "high"],
+        ...         [31, 87.8, "high"],
+        ...         [22, 71.6, "medium"],
+        ...         [35, 95, "medium"],
+        ...     ],
+        ...     columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
+        ...     index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
+        ... )
+
+        >>> df
+                    temp_celsius  temp_fahrenheit windspeed
+        2014-02-12          24.3             75.7      high
+        2014-02-13          31.0             87.8      high
+        2014-02-14          22.0             71.6    medium
+        2014-02-15          35.0             95.0    medium
+
+        >>> df.get(["temp_celsius", "windspeed"])
+                    temp_celsius windspeed
+        2014-02-12          24.3      high
+        2014-02-13          31.0      high
+        2014-02-14          22.0    medium
+        2014-02-15          35.0    medium
+
+        >>> ser = df['windspeed']
+        >>> ser.get('2014-02-13')
+        2014-02-13    high
+        Freq: None, Name: windspeed, dtype: object
+
+        Snowpark pandas indexing won't raise KeyError if any key is not found;
+        instead, it will return the results from the found keys
+        or return an empty series if no key is found.
+
+        >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")
+                    temp_celsius
+        2014-02-12          24.3
+        2014-02-13          31.0
+        2014-02-14          22.0
+        2014-02-15          35.0
+
+        >>> ser.get('2014-02-10', '[unknown]')
+        Series([], Freq: None, Name: windspeed, dtype: object)
+
+        Notes:
+            Generally Snowpark pandas won't raise KeyError or IndexError if any key is not found.
+            So the result of `get` will be a result with existing keys or an empty result if no key is found.
+            Default value won't be used.
+        """
+
+    def gt():
+        """
+        Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`).
+        """
+
+    def head():
+        """
+        Return the first `n` rows.
+        """
+
+    @property
+    def iat():
+        """
+        Get a single value for a row/column pair by integer position.
+        """
+
+    def idxmax():
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        Parameters
+        ----------
+        axis : {0 or 1}, default 0
+            The axis to use. 0 for row-wise, 1 for column-wise.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+        numeric_only: bool, default False:
+            Include only float, int or boolean data.
+
+        Returns
+        -------
+        Series if DataFrame input, Index if Series input
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
+        ...                     'co2_emissions': [37.2, 19.66, 1712]},
+        ...                   index=['Pork', 'Wheat Products', 'Beef'])
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51          37.20
+        Wheat Products       103.11          19.66
+        Beef                  55.48        1712.00
+        >>> df.idxmax()
+        consumption      Wheat Products
+        co2_emissions              Beef
+        dtype: object
+        >>> df.idxmax(axis=1)
+        Pork              co2_emissions
+        Wheat Products      consumption
+        Beef              co2_emissions
+        dtype: object
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmax()
+        'C'
+        >>> s.idxmax(skipna=False)  # doctest: +SKIP
+        nan
+        """
+
+    def idxmin():
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        Parameters
+        ----------
+        axis : {0 or 1}, default 0
+            The axis to use. 0 for row-wise, 1 for column-wise.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result will be NA.
+        numeric_only: bool, default False:
+            Include only float, int or boolean data.
+
+        Returns
+        -------
+        Series if DataFrame input, Index if Series input
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
+        ...                     'co2_emissions': [37.2, 19.66, 1712]},
+        ...                   index=['Pork', 'Wheat Products', 'Beef'])
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51          37.20
+        Wheat Products       103.11          19.66
+        Beef                  55.48        1712.00
+        >>> df.idxmin()
+        consumption                Pork
+        co2_emissions    Wheat Products
+        dtype: object
+        >>> df.idxmin(axis=1)
+        Pork                consumption
+        Wheat Products    co2_emissions
+        Beef                consumption
+        dtype: object
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmin()
+        'A'
+        >>> s.idxmin(skipna=False)  # doctest: +SKIP
+        nan
+        """
+
+    def infer_objects():
+        """
+        Attempt to infer better dtypes for object columns.
+        """
+
+    def convert_dtypes():
+        """
+        Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
+        """
+
+    def isin():
+        """
+        Whether elements in `BasePandasDataset` are contained in `values`.
+        """
+
+    def isna():
+        """
+        Detect missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are missing (NaN in numeric
+        arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : scalar or array-like
+                Object to check for null or missing values.
+
+        Returns
+        -------
+        bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is missing.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.isna()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        >>> df.isnull()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        """
+
+    isnull = isna
+
+    @property
+    def iloc():
+        """
+        Purely integer-location based indexing for selection by position.
+
+        ``.iloc[]`` is primarily integer position based (from ``0`` to
+        ``length-1`` of the axis), but may also be used with a boolean
+        array.
+
+        Allowed inputs are:
+
+        - An integer, e.g. ``5``, ``-1``.
+        - A list or array of integers, e.g. ``[4, 3, 0]``.
+        - A slice object with ints, e.g. ``1:7``.
+        - A boolean array.
+        - A ``callable`` function with one argument (the calling Series or
+          DataFrame) and that returns valid output for indexing (one of the above).
+          This is useful in method chains, when you don't have a reference to the
+          calling object, but would like to base your selection on some value.
+        - A tuple of row and column indexes. The tuple elements consist of one of the
+          above inputs, e.g. ``(0, 1)``.
+
+        Notes
+        -----
+        To meet the nature of lazy evaluation:
+
+        - Snowpark pandas ``.iloc`` ignores out-of-bounds indexing for all types of indexers (while pandas ``.iloc``
+          will raise error except *slice* indexer). If all values are out-of-bound, an empty result will be returned.
+        - In Snowpark pandas ``.iloc``, the length of boolean list-like indexers (e.g., list, Series, Index, numpy
+          ndarray) does not need to be the same length as the row/column being indexed. Internally a join is going to
+          be performed so missing or out of bound values will be ignored.
+
+        See Also
+        --------
+        DataFrame.iat : Fast integer location scalar accessor.
+        DataFrame.loc : Purely label-location based indexer for selection by label.
+        Series.iloc : Purely integer-location based indexing for
+                       selection by position.
+
+        Examples
+        --------
+        >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
+        ...           {'a': 100, 'b': 200, 'c': 300, 'd': 400},
+        ...           {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]
+        >>> df = pd.DataFrame(mydict)
+        >>> df
+              a     b     c     d
+        0     1     2     3     4
+        1   100   200   300   400
+        2  1000  2000  3000  4000
+
+        **Indexing just the rows**
+
+        With a scalar integer.
+
+        >>> type(df.iloc[0])
+        <class 'snowflake.snowpark.modin.pandas.series.Series'>
+        >>> df.iloc[0]
+        a    1
+        b    2
+        c    3
+        d    4
+        Name: 0, dtype: int64
+
+        >>> df.iloc[-1]
+        a    1000
+        b    2000
+        c    3000
+        d    4000
+        Name: 2, dtype: int64
+
+        With a list of integers.
+
+        >>> df.iloc[[0]]
+           a  b  c  d
+        0  1  2  3  4
+        >>> type(df.iloc[[0]])
+        <class 'snowflake.snowpark.modin.pandas.dataframe.DataFrame'>
+
+        >>> df.iloc[[0, 1]]
+             a    b    c    d
+        0    1    2    3    4
+        1  100  200  300  400
+
+        With out-of-bound values in the list and those out of bound values will be ignored.
+
+        >>> df.iloc[[0, 1, 10, 11]]
+             a    b    c    d
+        0    1    2    3    4
+        1  100  200  300  400
+
+        With all out-of-bound values. Return empty dataset.
+
+        >>> df.iloc[[10, 11, 12]]
+        Empty DataFrame
+        Columns: [a, b, c, d]
+        Index: []
+
+        With a `slice` object.
+
+        >>> df.iloc[:3]
+              a     b     c     d
+        0     1     2     3     4
+        1   100   200   300   400
+        2  1000  2000  3000  4000
+
+        With a boolean mask the same length as the index.
+
+        >>> df.iloc[[True, False, True]]
+              a     b     c     d
+        0     1     2     3     4
+        2  1000  2000  3000  4000
+
+        When a boolean mask shorter than the index.
+
+        >>> df.iloc[[True, False]]      # doctest: +NORMALIZE_WHITESPACE
+              a     b     c     d
+        0     1     2     3     4
+
+        When a boolean mask longer than the index.
+
+        >>> df.iloc[[True, False, True, True, True]]      # doctest: +NORMALIZE_WHITESPACE
+              a     b     c     d
+        0     1     2     3     4
+        2  1000  2000  3000  4000
+
+        With a callable, useful in method chains. The `x` passed
+        to the ``lambda`` is the DataFrame being sliced. This selects
+        the rows whose index labels are even.
+
+        >>> df.iloc[lambda x: x.index % 2 == 0]
+              a     b     c     d
+        0     1     2     3     4
+        2  1000  2000  3000  4000
+
+        **Indexing both axes**
+
+        You can mix the indexer types for the index and columns. Use ``:`` to
+        select the entire axis.
+
+        With scalar integers.
+
+        >>> df.iloc[0, 1]
+        2
+
+        With lists of integers.
+
+        >>> df.iloc[[0, 2], [1, 3]]
+              b     d
+        0     2     4
+        2  2000  4000
+
+        With `slice` objects.
+
+        >>> df.iloc[1:3, 0:3]
+              a     b     c
+        1   100   200   300
+        2  1000  2000  3000
+
+        With a boolean array whose length matches the columns.
+
+        >>> df.iloc[:, [True, False, True, False]]
+              a     c
+        0     1     3
+        1   100   300
+        2  1000  3000
+
+        With a callable function that expects the Series or DataFrame.
+
+        >>> df.iloc[:, lambda df: [0, 2]]
+              a     c
+        0     1     3
+        1   100   300
+        2  1000  3000
+        """
+
+    def kurt():
+        """
+        Return unbiased kurtosis over requested axis.
+
+        Kurtosis obtained using Fisher's definition of
+        kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
+        """
+
+    def last():
+        """
+        Select final periods of time series data based on a date offset.
+        """
+
+    def last_valid_index():
+        """
+        Return index for last non-NA value or None, if no non-NA value is found.
+
+        Returns
+        -------
+        scalar or None, Tuple of scalars if MultiIndex
+
+        Examples
+        --------
+        >>> s = pd.Series([None, 3, 4])
+        >>> s.last_valid_index()
+        2
+        >>> s = pd.Series([None, None])
+        >>> s.last_valid_index()
+        >>> df = pd.DataFrame({'A': [None, 1, 2, None], 'B': [3, 2, 1, None]}, index=[10, 11, 12, 13])
+        >>> df
+              A    B
+        10  NaN  3.0
+        11  1.0  2.0
+        12  2.0  1.0
+        13  NaN  NaN
+        >>> df.last_valid_index()
+        12
+        >>> df = pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"])
+        >>> df.last_valid_index()
+        'man'
+        """
+
+    def le():
+        """
+        Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`).
+        """
+
+    def lt():
+        """
+        Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`).
+        """
+
+    @property
+    def loc():
+        """
+        Access a group of rows and columns by label(s) or a boolean array.
+
+        ``.loc[]`` is primarily label based, but may also be used with a
+        boolean array.
+
+        Allowed inputs are:
+
+        - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
+          interpreted as a *label* of the index, and **never** as an
+          integer position along the index).
+        - A list or array of labels, e.g. ``['a', 'b', 'c']``.
+        - A slice object with labels, e.g. ``'a':'f'``.
+
+          .. warning:: Note that contrary to usual python slices, **both** the
+              start and the stop are included
+
+        - A boolean array of the same length as the axis being sliced,
+          e.g. ``[True, False, True]``.
+        - An alignable boolean Series. The index of the key will be aligned before
+          masking.
+        - An alignable Index. The Index of the returned selection will be the input.
+        - A ``callable`` function with one argument (the calling Series or
+          DataFrame) and that returns valid output for indexing (one of the above)
+
+        Notes
+        -----
+        To meet the nature of lazy evaluation:
+
+        - Snowpark pandas ``.loc`` ignores out-of-bounds indexing for all types of indexers (while pandas ``.loc``
+          may raise KeyError). If all values are out-of-bound, an empty result will be returned.
+        - In Snowpark pandas ``.loc``, unalignable boolean Series provided as indexer will perform a join on the index
+          of the main dataframe or series. (while pandas will raise an IndexingError)
+        - When there is a slice key, Snowpark pandas ``.loc`` performs the same as native pandas when both the start and
+          stop are labels present in the index or either one is absert but the index is sorted. When any of the two
+          labels is absert from an unsorted index, Snowpark pandas will return rows in between while native pandas will
+          raise a KeyError.
+        - Special indexing for DatetimeIndex is unsupported in Snowpark pandas, e.g., `partial string indexing <https://pandas.pydata.org/docs/user_guide/timeseries.html#partial-string-indexing>`_.
+        - While setting rows with duplicated index, Snowpark pandas won't raise ValueError for duplicate labels to avoid
+          eager evaluation.
+
+        See Also
+        --------
+        DataFrame.at : Access a single value for a row/column label pair.
+        DataFrame.iloc : Access group of rows and columns by integer position(s).
+        DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
+            Series/DataFrame.
+        Series.loc : Access group of values using labels.
+
+        Examples
+        --------
+        **Getting values**
+
+        >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
+        ...      index=['cobra', 'viper', 'sidewinder'],
+        ...      columns=['max_speed', 'shield'])
+        >>> df
+                    max_speed  shield
+        cobra               1       2
+        viper               4       5
+        sidewinder          7       8
+
+        Single label. Note this returns the row as a Series.
+
+        >>> df.loc['viper']
+        max_speed    4
+        shield       5
+        Name: viper, dtype: int64
+
+        List of labels. Note using ``[[]]`` returns a DataFrame.
+
+        >>> df.loc[['viper', 'sidewinder']]
+                    max_speed  shield
+        viper               4       5
+        sidewinder          7       8
+
+        Single label for row and column
+
+        >>> df.loc['cobra', 'shield']
+        2
+
+        Slice with labels for row and single label for column. As mentioned
+        above, note that both the start and stop of the slice are included.
+
+        >>> df.loc['cobra':'viper', 'max_speed']
+        cobra    1
+        viper    4
+        Name: max_speed, dtype: int64
+
+        Boolean list with the same length as the row axis
+
+        >>> df.loc[[False, False, True]]
+                    max_speed  shield
+        sidewinder          7       8
+
+        Alignable boolean Series:
+
+        >>> df.loc[pd.Series([False, True, False],
+        ...        index=['viper', 'sidewinder', 'cobra'])]
+                    max_speed  shield
+        sidewinder          7       8
+
+        Index (same behavior as ``df.reindex``)
+
+        >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]  # doctest: +SKIP
+               max_speed  shield
+        foo
+        cobra          1       2
+        viper          4       5
+
+        Conditional that returns a boolean Series
+
+        >>> df.loc[df['shield'] > 6]
+                    max_speed  shield
+        sidewinder          7       8
+
+        Conditional that returns a boolean Series with column labels specified
+
+        >>> df.loc[df['shield'] > 6, ['max_speed']]
+                    max_speed
+        sidewinder          7
+
+        Callable that returns a boolean Series
+
+        >>> df.loc[lambda df: df['shield'] == 8]
+                    max_speed  shield
+        sidewinder          7       8
+
+        **Setting values**
+
+        Set value for all items matching the list of labels
+
+        >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
+        >>> df
+                    max_speed  shield
+        cobra               1       2
+        viper               4      50
+        sidewinder          7      50
+
+        Set value for an entire row
+
+        >>> df.loc['cobra'] = 10
+        >>> df
+                    max_speed  shield
+        cobra              10      10
+        viper               4      50
+        sidewinder          7      50
+
+        Set value for an entire column
+
+        >>> df.loc[:, 'max_speed'] = 30
+        >>> df
+                    max_speed  shield
+        cobra              30      10
+        viper              30      50
+        sidewinder         30      50
+
+        Set value for rows matching callable condition
+
+        >>> df.loc[df['shield'] > 35] = 0
+        >>> df
+                    max_speed  shield
+        cobra              30      10
+        viper               0       0
+        sidewinder          0       0
+
+        **Getting values on a DataFrame with an index that has integer labels**
+
+        Another example using integers for the index
+
+        >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
+        ...      index=[7, 8, 9], columns=['max_speed', 'shield'])
+        >>> df
+           max_speed  shield
+        7          1       2
+        8          4       5
+        9          7       8
+
+        Slice with integer labels for rows. As mentioned above, note that both
+        the start and stop of the slice are included.
+
+        >>> df.loc[7:9]
+           max_speed  shield
+        7          1       2
+        8          4       5
+        9          7       8
+
+        **Getting values with a MultiIndex**
+
+        A number of examples using a DataFrame with a MultiIndex
+
+        >>> tuples = [
+        ...    ('cobra', 'mark i'), ('cobra', 'mark ii'),
+        ...    ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
+        ...    ('viper', 'mark ii'), ('viper', 'mark iii')
+        ... ]
+        >>> index = pd.MultiIndex.from_tuples(tuples)
+        >>> values = [[12, 2], [0, 4], [10, 20],
+        ...         [1, 4], [7, 1], [16, 36]]
+        >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
+        >>> df
+                             max_speed  shield
+        cobra      mark i           12       2
+                   mark ii           0       4
+        sidewinder mark i           10      20
+                   mark ii           1       4
+        viper      mark ii           7       1
+                   mark iii         16      36
+
+        Single label. Note this returns a DataFrame with a single index.
+
+        >>> df.loc['cobra']
+                 max_speed  shield
+        mark i          12       2
+        mark ii          0       4
+
+        Single index tuple. Note this returns a Series.
+
+        >>> df.loc[('cobra', 'mark ii')]
+        max_speed    0
+        shield       4
+        Name: ('cobra', 'mark ii'), dtype: int64
+
+        Single label for row and column. Similar to passing in a tuple, this
+        returns a Series.
+
+        >>> df.loc['cobra', 'mark i']
+        max_speed    12
+        shield        2
+        Name: ('cobra', 'mark i'), dtype: int64
+
+        Single tuple. Note using ``[[]]`` returns a DataFrame.
+
+        >>> df.loc[[('cobra', 'mark ii')]]
+                       max_speed  shield
+        cobra mark ii          0       4
+
+        Single tuple for the index with a single label for the column
+
+        >>> df.loc[('cobra', 'mark i'), 'shield']
+        2
+
+        Slice from index tuple to single label
+
+        >>> df.loc[('cobra', 'mark i'):'viper']
+                             max_speed  shield
+        cobra      mark i           12       2
+                   mark ii           0       4
+        sidewinder mark i           10      20
+                   mark ii           1       4
+        viper      mark ii           7       1
+                   mark iii         16      36
+
+        Slice from index tuple to index tuple
+
+        >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
+                            max_speed  shield
+        cobra      mark i          12       2
+                   mark ii          0       4
+        sidewinder mark i          10      20
+                   mark ii          1       4
+        viper      mark ii          7       1
+
+        """
+
+    @doc(
+        _num_doc,
+        desc="Return the maximum of the values over the requested axis.\n\n"
+        "If you want the *index* of the maximum, use ``idxmax``. This is "
+        "the equivalent of the ``numpy.ndarray`` method ``argmax``.",
+        axis_descr=_axis_descr,
+        name1=_name1,
+        see_also="",
+        examples=_max_examples,
+    )
+    def max():
+        pass
+
+    def memory_usage():
+        """
+        Return the memory usage of the `BasePandasDataset`.
+        """
+        pass
+
+    @doc(
+        _num_doc,
+        desc="Return the minimum of the values over the requested axis.\n\n"
+        "If you want the *index* of the minimum, use ``idxmin``. This is "
+        "the equivalent of the ``numpy.ndarray`` method ``argmin``.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        see_also="",
+        examples=_min_examples,
+    )
+    def min():
+        pass
+
+    def mod():
+        """
+        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`).
+        """
+        pass
+
+    def mode():
+        """
+        Get the mode(s) of each element along the selected axis.
+        """
+        pass
+
+    def mul():
+        """
+        Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`).
+        """
+
+    multiply = mul
+
+    def ne():
+        """
+        Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`).
+        """
+
+    def notna():
+        """
+        Detect non-missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are valid (not missing, which
+        is NaN in numeric arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : array-like or object value
+            Object to check for not null or non-missing values.
+
+        Returns
+        -------
+            bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is valid.
+
+        Example
+        -------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.notna()
+              0      1     2
+        0  True   True  True
+        1  True  False  True
+        >>> df.notnull()
+              0      1     2
+        0  True   True  True
+        1  True  False  True
+        """
+
+    notnull = notna
+
+    def nunique():
+        """
+        Return number of unique elements in the `BasePandasDataset`.
+        """
+
+    def pct_change():
+        """
+        Percentage change between the current and a prior element.
+        """
+
+    def pipe():
+        """
+        Apply chainable functions that expect `BasePandasDataset`.
+        """
+
+    def pop():
+        """
+        Return item and drop from frame. Raise KeyError if not found.
+        """
+
+    def pow():
+        """
+        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`).
+        """
+
+    def quantile():
+        """
+        Return values at the given quantile over requested axis.
+
+        Parameters
+        ----------
+        q: float or array-like of float, default 0.5
+            Value(s) between 0 <= q <= 1, the quantile(s) to compute.
+        axis: {0 or 'index', 1 or 'columns'}, default 0
+            Axis across which to compute quantiles.
+        numeric_only: bool, default False
+            Include only data where `is_numeric_dtype` is true.
+            When True, bool columns are included, but attempting to compute quantiles across
+            bool values is an ill-defined error in both pandas and Snowpark pandas.
+        interpolation: {"linear", "lower", "higher", "midpoint", "nearest"}, default "linear"
+            Specifies the interpolation method to use if a quantile lies between two data points
+            *i* and *j*:
+
+            * linear: *i* + (*j* - *i*) * *fraction*, where *fraction* is the fractional part of the
+              index surrounded by *i* and *j*.
+            * lower: *i*.
+            * higher: *j*.
+            * nearest: *i* or *j*, whichever is nearest.
+            * midpoint: (*i* + *j*) / 2.
+        method: {"single", "table"}, default "single"
+            Whether to compute quantiles per-column ("single") or over all columns ("table").
+            When "table", the only allowed interpolation methods are "nearest", "lower", and "higher".
+
+        Returns
+        -------
+        float, Series, or DataFrame
+            If ``q`` is an array:
+            - If this is a DataFrame, a DataFrame will be returned where the index is ``q``, the columns
+            are the columns of ``self``, and the values are the quantiles.
+            - If this is a Series, a Series will be returned where the index is ``q`` and the values
+            are the quantiles.
+
+            If ``q`` is a float:
+            - If this is a DataFrame, a Series will be returned where the index is the columns of
+            ``self`` and the values are the quantiles.
+            - If this is a Series, the float value of that quantile will be returned.
+        """
+
+    def rank():
+        """
+        Compute numerical data ranks (1 through n) along axis.
+
+        Parameters
+        ----------
+        axis: {0 or 'index', 1 or 'columns'}, default 0
+            Index to direct ranking. For Series this parameter is unused and defaults to 0.
+        method: {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+            How to rank the group of records that have the same value (i.e. break ties):
+            - average: average rank of the group
+            - min: lowest rank in the group
+            - max: highest rank in the group
+            - first: ranks assigned in order they appear in the array
+            - dense: like 'min', but rank always increases by 1 between groups.
+        numeric_only: bool, default False
+            For DataFrame objects, rank only numeric columns if set to True.
+        na_option: {'keep', 'top', 'bottom'}, default 'keep'
+            How to rank NaN values:
+            - keep: assign NaN rank to NaN values
+            - top: assign lowest rank to NaN values
+            - bottom: assign highest rank to NaN values
+        ascending: bool, default True
+            Whether or not the elements should be ranked in ascending order.
+        pct: bool, default False
+            Whether or not to display the returned rankings in percentile form.
+
+        Returns
+        -------
+        Series or DataFrame with data ranks as values.
+        """
+
+    def reindex():
+        """
+        Conform `BasePandasDataset` to new index with optional filling logic.
+        """
+
+    def reindex_like():
+        """
+        Return an object with matching indices as `other` object.
+        """
+
+    def rename_axis():
+        """
+        Set the name of the axis for the index or columns.
+        """
+
+    def reorder_levels():
+        """
+        Rearrange index levels using input order.
+        """
+
+    def resample():
+        """
+        Resample time-series data.
+        """
+
+    def reset_index():
+        """
+        Reset the index, or a level of it.
+
+        Reset the index of the DataFrame, and use the default one instead.
+        If the DataFrame has a MultiIndex, this method can remove one or more
+        levels.
+
+        Parameters
+        ----------
+        level : int, str, tuple, or list, default None
+            Only remove the given levels from the index. Removes all levels by
+            default.
+        drop : bool, default False
+            Do not try to insert index into dataframe columns. This resets
+            the index to the default integer index.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+        col_level : int or str, default 0
+            If the columns have multiple levels, determines which level the
+            labels are inserted into. By default, it is inserted into the first
+            level.
+        col_fill : object, default ''
+            If the columns have multiple levels, determines how the other
+            levels are named. If None then the index name is repeated.
+        allow_duplicates : bool, optional, default lib.no_default
+            Allow duplicate column labels to be created.
+        names : int, str or 1-dimensional list, default None
+            Using the given string, rename the DataFrame column which contains the
+            index data. If the DataFrame has a MultiIndex, this has to be a list or
+            tuple with length equal to the number of levels.
+
+        Returns
+        -------
+        DataFrame or None
+            DataFrame with the new index or None if ``inplace=True``.
+
+        See Also
+        --------
+        Series.reset_index: Analogous function for Series.
+        DataFrame.set_index : Opposite of reset_index.
+        DataFrame.reindex : Change to new indices or expand indices.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([('bird', 389.0),
+        ...                    ('bird', 24.0),
+        ...                    ('mammal', 80.5),
+        ...                    ('mammal', np.nan)],
+        ...                   index=['falcon', 'parrot', 'lion', 'monkey'],
+        ...                   columns=('class', 'max_speed'))
+        >>> df
+                 class  max_speed
+        falcon    bird      389.0
+        parrot    bird       24.0
+        lion    mammal       80.5
+        monkey  mammal        NaN
+
+        When we reset the index, the old index is added as a column, and a
+        new sequential index is used:
+
+        >>> df.reset_index()
+            index   class  max_speed
+        0  falcon    bird      389.0
+        1  parrot    bird       24.0
+        2    lion  mammal       80.5
+        3  monkey  mammal        NaN
+
+        We can use the `drop` parameter to avoid the old index being added as
+        a column:
+
+        >>> df.reset_index(drop=True)
+            class  max_speed
+        0    bird      389.0
+        1    bird       24.0
+        2  mammal       80.5
+        3  mammal        NaN
+
+        You can also use `reset_index` with `MultiIndex`.
+
+        >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
+        ...                                    ('bird', 'parrot'),
+        ...                                    ('mammal', 'lion'),
+        ...                                    ('mammal', 'monkey')],
+        ...                                   names=['class', 'name'])
+        >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'),
+        ...                                      ('species', 'type')])
+        >>> df = pd.DataFrame([(389.0, 'fly'),
+        ...                    ( 24.0, 'fly'),
+        ...                    ( 80.5, 'run'),
+        ...                    (np.nan, 'jump')],
+        ...                   index=index,
+        ...                   columns=columns)
+        >>> df # doctest: +NORMALIZE_WHITESPACE
+                       speed species
+                         max    type
+        class  name
+        bird   falcon  389.0     fly
+               parrot   24.0     fly
+        mammal lion     80.5     run
+               monkey    NaN    jump
+
+        Using the `names` parameter, choose a name for the index column:
+
+        >>> df.reset_index(names=['classes', 'names']) # doctest: +NORMALIZE_WHITESPACE
+          classes   names  speed species
+                             max    type
+        0    bird  falcon  389.0     fly
+        1    bird  parrot   24.0     fly
+        2  mammal    lion   80.5     run
+        3  mammal  monkey    NaN    jump
+
+        If the index has multiple levels, we can reset a subset of them:
+
+        >>> df.reset_index(level='class') # doctest: +NORMALIZE_WHITESPACE
+                 class  speed species
+                          max    type
+        name
+        falcon    bird  389.0     fly
+        parrot    bird   24.0     fly
+        lion    mammal   80.5     run
+        monkey  mammal    NaN    jump
+
+        If we are not dropping the index, by default, it is placed in the top
+        level. We can place it in another level:
+
+        >>> df.reset_index(level='class', col_level=1) # doctest: +NORMALIZE_WHITESPACE
+                        speed species
+                 class    max    type
+        name
+        falcon    bird  389.0     fly
+        parrot    bird   24.0     fly
+        lion    mammal   80.5     run
+        monkey  mammal    NaN    jump
+
+        When the index is inserted under another level, we can specify under
+        which one with the parameter `col_fill`:
+
+        >>> df.reset_index(level='class', col_level=1, col_fill='species') # doctest: +NORMALIZE_WHITESPACE
+               species  speed species
+                 class    max    type
+        name
+        falcon    bird  389.0     fly
+        parrot    bird   24.0     fly
+        lion    mammal   80.5     run
+        monkey  mammal    NaN    jump
+
+        If we specify a nonexistent level for `col_fill`, it is created:
+
+        >>> df.reset_index(level='class', col_level=1, col_fill='genus') # doctest: +NORMALIZE_WHITESPACE
+                 genus  speed species
+                 class    max    type
+        name
+        falcon    bird  389.0     fly
+        parrot    bird   24.0     fly
+        lion    mammal   80.5     run
+        monkey  mammal    NaN    jump
+        """
+
+    def radd():
+        """
+        Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`).
+        """
+
+    def rfloordiv():
+        """
+        Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`).
+        """
+
+    def rmod():
+        """
+        Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`).
+        """
+
+    def rmul():
+        """
+        Get Multiplication of dataframe and other, element-wise (binary operator `rmul`).
+        """
+
+    def rolling():
+        """
+        Provide rolling window calculations.
+        Currently, support is only provided for integer ``window``, ``axis = 0``, and ``min_periods = 1``.
+
+        Parameters
+        ----------
+        window: int, timedelta, str, offset, or BaseIndexer subclass. Size of the moving window.
+            If an integer, the fixed number of observations used for each window.
+            If a timedelta, str, or offset, the time period of each window. Each window will be a variable sized based on the observations included in the time-period. This is only valid for datetimelike indexes.
+            If a BaseIndexer subclass, the window boundaries based on the defined get_window_bounds method. Additional rolling keyword arguments, namely min_periods, center, closed and step will be passed to get_window_bounds.
+        min_periods: int, default None.
+            Minimum number of observations in window required to have a value; otherwise, result is np.nan.
+            For a window that is specified by an offset, min_periods will default to 1.
+            For a window that is specified by an integer, min_periods will default to the size of the window.
+        center: bool, default False.
+            If False, set the window labels as the right edge of the window index.
+            If True, set the window labels as the center of the window index.
+        win_type: str, default None
+            If None, all points are evenly weighted.
+            If a string, it must be a valid scipy.signal window function.
+            Certain Scipy window types require additional parameters to be passed in the aggregation function. The additional parameters must match the keywords specified in the Scipy window type method signature.
+        on: str, optional
+            For a DataFrame, a column label or Index level on which to calculate the rolling window, rather than the DataFrame’s index.
+            Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window.
+        axis: int or str, default 0
+            If 0 or 'index', roll across the rows.
+            If 1 or 'columns', roll across the columns.
+            For Series this parameter is unused and defaults to 0.
+        closed: str, default None
+            If 'right', the first point in the window is excluded from calculations.
+            If 'left', the last point in the window is excluded from calculations.
+            If 'both', the no points in the window are excluded from calculations.
+            If 'neither', the first and last points in the window are excluded from calculations.
+            Default None ('right').
+        step: int, default None
+            Evaluate the window at every step result, equivalent to slicing as [::step]. window must be an integer. Using a step argument other than None or 1 will produce a result with a different shape than the input.
+        method: str {‘single’, ‘table’}, default ‘single’
+            Execute the rolling operation per single column or row ('single') or over the entire object ('table').
+            This argument is only implemented when specifying engine='numba' in the method call.
+        """
+
+    def round():
+        """
+        Round a `BasePandasDataset` to a variable number of decimal places.
+        """
+
+    def rpow():
+        """
+        Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`).
+        """
+
+    def rsub():
+        """
+        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`).
+        """
+
+    def rtruediv():
+        """
+        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`).
+        """
+
+    rdiv = rtruediv
+
+    def sample():
+        """
+        Return a random sample of items from an axis of object.
+
+        You can use `random_state` for reproducibility.
+
+        Parameters
+        ----------
+        n : int, optional
+            Number of items from axis to return. Cannot be used with `frac`.
+            Default = 1 if `frac` = None.
+        frac : float, optional
+            Fraction of axis items to return. Cannot be used with `n`.
+        replace : bool, default False
+            Allow or disallow sampling of the same row more than once.
+        weights : str or ndarray-like, optional
+            Default 'None' results in equal probability weighting.
+            If passed a Series, will align with target object on index. Index
+            values in weights not found in sampled object will be ignored and
+            index values in sampled object not in weights will be assigned
+            weights of zero.
+            If called on a DataFrame, will accept the name of a column
+            when axis = 0.
+            Unless weights are a Series, weights must be same length as axis
+            being sampled.
+            If weights do not sum to 1, they will be normalized to sum to 1.
+            Missing values in the weights column will be treated as zero.
+            Infinite values not allowed.
+        random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional
+            If int, array-like, or BitGenerator, seed for random number generator.
+            If np.random.RandomState or np.random.Generator, use as given.
+
+            .. versionchanged:: 1.1.0
+
+                array-like and BitGenerator object now passed to np.random.RandomState()
+                as seed
+
+            .. versionchanged:: 1.4.0
+
+                np.random.Generator objects now accepted
+
+        axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
+            Axis to sample. Accepts axis number or name. Default is stat axis
+            for given data type. For `Series` this parameter is unused and defaults to `None`.
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 1.3.0
+
+        Returns
+        -------
+        Series or DataFrame
+            A new object of same type as caller containing `n` items randomly
+            sampled from the caller object.
+
+        See Also
+        --------
+        DataFrameGroupBy.sample: Generates random samples from each group of a
+            DataFrame object.
+        SeriesGroupBy.sample: Generates random samples from each group of a
+            Series object.
+        numpy.random.choice: Generates a random sample from a given 1-D numpy
+            array.
+
+        Notes
+        -----
+        If `frac` > 1, `replacement` should be set to `True`.
+
+        Snowpark pandas `sample` does not support the following cases: `weights`, `random_state`, or `replace = True`
+        when `axis = 0`. Also, native pandas will raise error if `n` is larger than the length of the DataFrame while
+        Snowpark pandas will return all rows from the DataFrame.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
+        ...                    'num_wings': [2, 0, 0, 0],
+        ...                    'num_specimen_seen': [10, 2, 1, 8]},
+        ...                   index=['falcon', 'dog', 'spider', 'fish'])
+        >>> df
+                num_legs  num_wings  num_specimen_seen
+        falcon         2          2                 10
+        dog            4          0                  2
+        spider         8          0                  1
+        fish           0          0                  8
+
+        Extract 3 random elements from the ``Series`` ``df['num_legs']``:
+
+        >>> df['num_legs'].sample(n=3) # doctest: +SKIP
+        fish      0
+        spider    8
+        falcon    2
+        Name: num_legs, dtype: int64
+
+        A random 50% sample of the ``DataFrame``:
+
+        >>> df.sample(frac=0.5)  # doctest: +SKIP
+              num_legs  num_wings  num_specimen_seen
+        dog          4          0                  2
+        fish         0          0                  8
+
+        The exact number of specified rows is returned unless the DataFrame contains fewer rows:
+
+        >>> df.sample(n=20)
+                num_legs  num_wings  num_specimen_seen
+        falcon         2          2                 10
+        dog            4          0                  2
+        spider         8          0                  1
+        fish           0          0                  8
+        """
+
+    def sem():
+        """
+        Return unbiased standard error of the mean over requested axis.
+        """
+
+    @doc(
+        _num_doc,
+        desc="Return the mean of the values over the requested axis.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        see_also="",
+        examples=_mean_examples,
+    )
+    def mean():
+        pass
+
+    @doc(
+        _num_doc,
+        desc="Return the median of the values over the requested axis.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        see_also="",
+        examples=_median_examples,
+    )
+    def median():
+        pass
+
+    def set_flags():
+        """
+        Return a new `BasePandasDataset` with updated flags.
+        """
+
+    def shift():
+        """
+        Implement shared functionality between DataFrame and Series for shift. axis argument is only relevant for
+        Dataframe, and should be 0 for Series.
+        Args:
+            periods : int
+                Number of periods to shift. Can be positive or negative.
+            freq : not supported, default None
+            axis : {0 or 'index', 1 or 'columns', None}, default None
+                Shift direction.
+            fill_value : object, optional
+                The scalar value to use for newly introduced missing values.
+                the default depends on the dtype of `self`.
+                For numeric data, ``np.nan`` is used.
+                For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
+                For extension dtypes, ``self.dtype.na_value`` is used.
+
+        Returns:
+            BasePandasDataset
+        """
+
+    def skew():
+        """
+        Return unbiased skew, normalized over n-1
+
+        Parameters
+        ----------
+        axis: Optional[int]
+            Axis to calculate skew on, only 0 (columnar) is supported
+        skipna: Optional[bool]
+            Exclude NA values when calculating result ( only True is supported )
+        numeric_only: Optional[bool]
+            Include only the numeric columns ( only True is supported )
+        level: Optional[bool]
+            Not Supported, included for compatibility with other stats calls
+
+        Returns
+        -------
+        A series ( or scalar if used on a series ) with the calculated skew
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [0, 1, 2],
+        ...           'B': [1, 2, 1],
+        ...           'C': [3, 4, 5]})
+        >>> df.skew()
+        A    0.000000
+        B    1.732059
+        C    0.000000
+        dtype: float64
+        """
+
+    def sort_index():
+        """
+        Sort object by labels (along an axis).
+        """
+
+    def sort_values():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Sort by the values along either axis.
+
+        Parameters
+        ----------
+        by : str or list of str
+            Name or list of names to sort by.
+            - if axis is 0 or ‘index’ then by may contain index levels and/or column labels.
+            - if axis is 1 or ‘columns’ then by may contain column levels and/or index labels.
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+             Axis to be sorted.
+        ascending : bool or list of bool, default True
+             Sort ascending vs. descending. Specify list for multiple sort
+             orders.  If this is a list of bools, must match the length of
+             the by.
+        inplace : bool, default False
+             If True, perform operation in-place.
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'} default 'None'
+            Choice of sorting algorithm. By default, Snowpark Pandaas performs
+            unstable sort. Please use 'stable' to perform stable sort. Other choices
+            'quicksort', 'mergesort' and 'heapsort' are ignored.
+        na_position : {'first', 'last'}, default 'last'
+             Puts NaNs at the beginning if `first`; `last` puts NaNs at the
+             end.
+        ignore_index : bool, default False
+             If True, the resulting axis will be labeled 0, 1, …, n - 1.
+        key : callable, optional
+            Apply the key function to the values
+            before sorting. This is similar to the `key` argument in the
+            builtin :meth:`sorted` function, with the notable difference that
+            this `key` function should be *vectorized*. It should expect a
+            ``Series`` and return a Series with the same shape as the input.
+            It will be applied to each column in `by` independently.
+
+        Returns
+        -------
+        DataFrame or None
+            DataFrame with sorted values or None if ``inplace=True``.
+
+        Notes
+        -----
+        Snowpark pandas API doesn't currently support distributed computation of
+        sort_values when 'key' argument is provided or frame is sorted on 'columns' axis.
+
+        See Also
+        --------
+        DataFrame.sort_index : Sort a DataFrame by the index.
+        Series.sort_values : Similar method for a Series.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({
+        ...     'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
+        ...     'col2': [2, 1, 9, 8, 7, 4],
+        ...     'col3': [0, 1, 9, 4, 2, 3],
+        ...     'col4': ['a', 'B', 'c', 'D', 'e', 'F']
+        ... })
+        >>> df
+           col1  col2  col3 col4
+        0     A     2     0    a
+        1     A     1     1    B
+        2     B     9     9    c
+        3  None     8     4    D
+        4     D     7     2    e
+        5     C     4     3    F
+
+        Sort by col1
+
+        >>> df.sort_values(by=['col1'])
+           col1  col2  col3 col4
+        0     A     2     0    a
+        1     A     1     1    B
+        2     B     9     9    c
+        5     C     4     3    F
+        4     D     7     2    e
+        3  None     8     4    D
+
+        Sort by multiple columns
+
+        >>> df.sort_values(by=['col1', 'col2'])
+           col1  col2  col3 col4
+        1     A     1     1    B
+        0     A     2     0    a
+        2     B     9     9    c
+        5     C     4     3    F
+        4     D     7     2    e
+        3  None     8     4    D
+
+        Sort Descending
+
+        >>> df.sort_values(by='col1', ascending=False)
+           col1  col2  col3 col4
+        4     D     7     2    e
+        5     C     4     3    F
+        2     B     9     9    c
+        0     A     2     0    a
+        1     A     1     1    B
+        3  None     8     4    D
+
+        Putting NAs first
+
+        >>> df.sort_values(by='col1', ascending=False, na_position='first')
+           col1  col2  col3 col4
+        3  None     8     4    D
+        4     D     7     2    e
+        5     C     4     3    F
+        2     B     9     9    c
+        0     A     2     0    a
+        1     A     1     1    B
+
+        Sorting with a key function
+
+        >>> df.sort_values(by='col4', key=lambda col: col.str.lower())  # doctest: +SKIP
+           col1  col2  col3 col4
+        0     A     2     0    a
+        1     A     1     1    B
+        2     B     9     9    c
+        3  None     8     4    D
+        4     D     7     2    e
+        5     C     4     3    F
+        """
+
+    @doc(
+        _num_ddof_doc,
+        desc="Return sample standard deviation over requested axis."
+        "\n\nNormalized by N-1 by default. This can be changed using the "
+        "ddof argument.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        notes=_std_notes,
+        examples=_std_examples,
+    )
+    def std():
+        pass
+
+    @doc(
+        _num_min_count_doc,
+        desc="Return the sum of the values over the requested axis."
+        "\n\nThis is equivalent to the method numpy.sum.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        min_count=0,
+        see_also="",
+        examples=_sum_examples,
+    )
+    def sum():
+        pass
+
+    def sub():
+        """
+        Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`).
+        """
+
+    subtract = sub
+
+    def swapaxes():
+        """
+        Interchange axes and swap values axes appropriately.
+        """
+
+    def swaplevel():
+        """
+        Swap levels `i` and `j` in a `MultiIndex`.
+        """
+
+    def tail():
+        """
+        Return the last n rows.
+        """
+
+    def take():
+        """
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
+
+        Parameters
+        ----------
+        indices : array-like or slice
+            An array of ints indicating which positions to take.
+        axis : {0 or 'index', 1 or 'columns', None}, default 0
+            The axis on which to select elements. ``0`` means that we are
+            selecting rows, ``1`` means that we are selecting columns.
+            For `Series` this parameter is unused and defaults to 0.
+        **kwargs
+            For compatibility with :meth:`numpy.take`. Has no effect on the
+            output.
+
+        Returns
+        -------
+        same type as caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        Series.take : Take a subset of a Series by the given positional indices.
+        DataFrame.loc : Select a subset of a DataFrame by labels.
+        DataFrame.iloc : Select a subset of a DataFrame by positions.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
+        ...                    ('parrot', 'bird', 24.0),
+        ...                    ('lion', 'mammal', 80.5),
+        ...                    ('monkey', 'mammal', np.nan)],
+        ...                   columns=['name', 'class', 'max_speed'],
+        ...                   index=[0, 2, 3, 1])
+        >>> df
+             name   class  max_speed
+        0  falcon    bird      389.0
+        2  parrot    bird       24.0
+        3    lion  mammal       80.5
+        1  monkey  mammal        NaN
+
+        Take elements at positions 0 and 3 along the axis 0 (default).
+
+        Note how the actual indices selected (0 and 1) do not correspond to
+        our selected indices 0 and 3. That's because we are selecting the 0th
+        and 3rd rows, not rows whose indices equal 0 and 3.
+
+        >>> df.take([0, 3])
+             name   class  max_speed
+        0  falcon    bird      389.0
+        1  monkey  mammal        NaN
+
+        Take elements at indices 1 and 2 along the axis 1 (column selection).
+
+        >>> df.take([1, 2], axis=1)
+            class  max_speed
+        0    bird      389.0
+        2    bird       24.0
+        3  mammal       80.5
+        1  mammal        NaN
+
+        We may take elements using negative integers for positive indices,
+        starting from the end of the object, just like with Python lists.
+
+        >>> df.take([-1, -2])
+             name   class  max_speed
+        1  monkey  mammal        NaN
+        3    lion  mammal       80.5
+        """
+
+    def to_clipboard():
+        """
+        Copy object to the system clipboard.
+        """
+
+    def to_csv():
+        """
+        Write object to a comma-separated values (csv) file.
+        """
+
+    def to_excel():
+        """
+        Write object to an Excel sheet.
+        """
+
+    def to_hdf():
+        """
+        Write the contained data to an HDF5 file using HDFStore.
+        """
+
+    def to_json():
+        """
+        Convert the object to a JSON string.
+        """
+
+    def to_latex():
+        """
+        Render object to a LaTeX tabular, longtable, or nested table.
+        """
+
+    def to_markdown():
+        """
+        Print `BasePandasDataset` in Markdown-friendly format.
+        """
+
+    def to_pickle():
+        """
+        Pickle (serialize) object to file.
+        """
+
+    def to_numpy():
+        """
+        Convert the DataFrame or Series to a NumPy array.
+
+        By default, the dtype of the returned array will be the common NumPy
+        dtype of all types in the DataFrame or Series. Snowpark pandas API will draw data from Snowflake,
+        and perform client-side conversion into desired dtype.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            This argument is ignored in Snowflake backend. The data from Snowflake
+            will be retrieved into the client, and a numpy array containing this
+            data will be returned.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the dtypes of the DataFrame columns. For example, in a
+            float64 array, ``np.nan`` will be replaced by this value.
+
+        Returns
+        -------
+        numpy.ndarray
+
+        Examples
+        --------
+        >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy()
+        array([[1, 3],
+               [2, 4]])
+        """
+
+    def to_period():
+        """
+        Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex.
+        """
+
+    def to_string():
+        """
+        Render a `BasePandasDataset` to a console-friendly tabular output.
+        """
+
+    def to_sql():
+        """
+        Write records stored in a `BasePandasDataset` to a SQL database.
+        """
+
+    def to_timestamp():
+        """
+        Cast to DatetimeIndex of timestamps, at *beginning* of period.
+        """
+
+    def to_xarray():
+        """
+        Return an xarray object from the `BasePandasDataset`.
+        """
+
+    def truediv():
+        """
+        Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`).
+        """
+
+    div = divide = truediv
+
+    def truncate():
+        """
+        Truncate a `BasePandasDataset` before and after some index value.
+        """
+
+    def transform():
+        """
+        Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self.
+        """
+
+    def tz_convert():
+        """
+        Convert tz-aware axis to target time zone.
+        """
+
+    def tz_localize():
+        """
+        Localize tz-naive index of a `BasePandasDataset` to target time zone.
+        """
+
+    @doc(
+        _num_ddof_doc,
+        desc="Return unbiased variance over requested axis.\n\nNormalized by "
+        "N-1 by default. This can be changed using the ddof argument.",
+        name1=_name1,
+        axis_descr=_axis_descr,
+        notes="",
+        examples=_var_examples,
+    )
+    def var():
+        pass
+
+    def __abs__():
+        """
+        Return a `BasePandasDataset` with absolute numeric value of each element.
+
+        Returns
+        -------
+        BasePandasDataset
+            Object containing the absolute value of each element.
+        """
+
+    @_doc_binary_op(
+        operation="union", bin_op="and", right="other", **_doc_binary_op_kwargs
+    )
+    def __and__():
+        pass
+
+    @_doc_binary_op(
+        operation="union", bin_op="rand", right="other", **_doc_binary_op_kwargs
+    )
+    def __rand__():
+        pass
+
+    def __array__():
+        """
+        Return the values as a NumPy array.
+
+        Parameters
+        ----------
+        dtype : str or np.dtype, optional
+            The dtype of returned array.
+
+        Returns
+        -------
+        arr : np.ndarray
+            NumPy representation of Modin object.
+        """
+
+    def __array_wrap__():
+        """
+        Get called after a ufunc and other functions.
+
+        Parameters
+        ----------
+        result : np.ndarray
+            The result of the ufunc or other function called on the NumPy array
+            returned by __array__.
+        context : tuple of (func, tuple, int), optional
+            This parameter is returned by ufuncs as a 3-element tuple: (name of the
+            ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
+            other NumPy functions.
+
+        Returns
+        -------
+        BasePandasDataset
+            Wrapped Modin object.
+        """
+
+    def __copy__():
+        """
+        Return the copy of the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        deep : bool, default: True
+            Whether the copy should be deep or not.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+
+    def __deepcopy__():
+        """
+        Return the deep copy of the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        memo : Any, optional
+           Deprecated parameter.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+
+    @_doc_binary_op(
+        operation="equality comparison",
+        bin_op="eq",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __eq__():
+        pass
+
+    def __finalize__():
+        """
+        Propagate metadata from `other` to `self`.
+
+        Parameters
+        ----------
+        other : BasePandasDataset
+            The object from which to get the attributes that we are going
+            to propagate.
+        method : str, optional
+            A passed method name providing context on where `__finalize__`
+            was called.
+        **kwargs : dict
+            Additional keywords arguments to be passed to `__finalize__`.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+
+    @_doc_binary_op(
+        operation="greater than or equal comparison",
+        bin_op="ge",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __ge__():
+        pass
+
+    def __getitem__():
+        """
+        Retrieve dataset according to `key`.
+
+        Parameters
+        ----------
+        key : callable, scalar, slice, str or tuple
+            The global row index to retrieve data from.
+
+        Returns
+        -------
+        BasePandasDataset
+            Located dataset.
+        """
+
+    @_doc_binary_op(
+        operation="greater than comparison",
+        bin_op="gt",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __gt__():
+        pass
+
+    def __invert__():
+        """
+        Apply bitwise inverse to each element of the `BasePandasDataset`.
+
+        Returns
+        -------
+        BasePandasDataset
+            New BasePandasDataset containing bitwise inverse to each value.
+        """
+
+    @_doc_binary_op(
+        operation="less than or equal comparison",
+        bin_op="le",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __le__():
+        pass
+
+    def __len__():
+        """
+        Return length of info axis.
+
+        Returns
+        -------
+        int
+        """
+
+    @_doc_binary_op(
+        operation="less than comparison",
+        bin_op="lt",
+        right="right",
+        **_doc_binary_op_kwargs,
+    )
+    def __lt__():
+        pass
+
+    def __matmul__():
+        """
+        Compute the matrix multiplication between the `BasePandasDataset` and `other`.
+
+        Parameters
+        ----------
+        other : BasePandasDataset or array-like
+            The other object to compute the matrix product with.
+
+        Returns
+        -------
+        BasePandasDataset, np.ndarray or scalar
+        """
+
+    @_doc_binary_op(
+        operation="not equal comparison",
+        bin_op="ne",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __ne__():
+        pass
+
+    def __neg__():
+        """
+        Change the sign for every value of self.
+
+        Returns
+        -------
+        BasePandasDataset
+        """
+
+    def __nonzero__():
+        """
+        Evaluate `BasePandasDataset` as boolean object.
+
+        Raises
+        ------
+        ValueError
+            Always since truth value for self is ambiguous.
+        """
+
+    __bool__ = __nonzero__
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="or",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __or__():
+        pass
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="ror",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __ror__():
+        pass
+
+    def __sizeof__():
+        """
+        Generate the total memory usage for an `BasePandasDataset`.
+
+        Returns
+        -------
+        int
+        """
+
+    def __str__():
+        """
+        Return str(self).
+
+        Returns
+        -------
+        str
+        """
+
+    @_doc_binary_op(
+        operation="exclusive disjunction",
+        bin_op="xor",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __xor__():
+        pass
+
+    @_doc_binary_op(
+        operation="exclusive disjunction",
+        bin_op="rxor",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __rxor__():
+        pass
+
+    @property
+    def size():
+        """Return an int representing the number of elements in this object."""
+
+    @property
+    def values():
+        """
+        Return a NumPy representation of the `BasePandasDataset`.
+        """
+
+    def __array_ufunc__():
+        """
+        Apply the `ufunc` to the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        ufunc : np.ufunc
+            The NumPy ufunc to apply.
+        method : str
+            The method to apply.
+        *inputs : tuple
+            The inputs to the ufunc.
+        **kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        BasePandasDataset
+            The result of the ufunc applied to the `BasePandasDataset`.
+        """
+
+    def __array_function__():
+        """
+        Apply the `func` to the `BasePandasDataset`.
+
+        Parameters
+        ----------
+        func : np.func
+            The NumPy func to apply.
+        types : tuple
+            The types of the args.
+        args : tuple
+            The args to the func.
+        kwargs : dict
+            Additional keyword arguments.
+
+        Returns
+        -------
+        BasePandasDataset
+            The result of the ufunc applied to the `BasePandasDataset`.
+        """
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
new file mode 100644
index 00000000000..30999aa9128
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py
@@ -0,0 +1,3971 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains DataFrame docstrings that override modin's docstrings."""
+
+from textwrap import dedent
+
+from pandas.util._decorators import doc
+
+from snowflake.snowpark.modin.pandas.base import _doc_binary_op_kwargs
+from snowflake.snowpark.modin.pandas.shared_docs import _shared_docs
+from snowflake.snowpark.modin.pandas.utils import _doc_binary_op
+
+_shared_doc_kwargs = {
+    "axes": "index, columns",
+    "klass": "DataFrame",
+    "axes_single_arg": "{0 or 'index', 1 or 'columns'}",
+    "axis": """axis : {0 or 'index', 1 or 'columns'}, default 0
+        If 0 or 'index': apply function to each column.
+        If 1 or 'columns': apply function to each row.""",
+    "inplace": """
+    inplace : bool, default False
+        Whether to modify the DataFrame rather than creating a new one.""",
+    "optional_by": """
+by : str or list of str
+    Name or list of names to sort by.
+
+    - if `axis` is 0 or `'index'` then `by` may contain index
+      levels and/or column labels.
+    - if `axis` is 1 or `'columns'` then `by` may contain column
+      levels and/or index labels.""",
+    "optional_reindex": """
+labels : array-like, optional
+    New labels / index to conform the axis specified by 'axis' to.
+index : array-like, optional
+    New labels for the index. Preferably an Index object to avoid
+    duplicating data.
+columns : array-like, optional
+    New labels for the columns. Preferably an Index object to avoid
+    duplicating data.
+axis : int or str, optional
+    Axis to target. Can be either the axis name ('index', 'columns')
+    or number (0, 1).""",
+}
+
+
+class DataFrame:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    """
+    Snowpark pandas representation of ``pandas.DataFrame`` with a lazily-evaluated relational dataset.
+
+    A DataFrame is considered lazy because it encapsulates the computation or query required to produce
+    the final dataset. The computation is not performed until the datasets need to be displayed, or I/O
+    methods like to_pandas, to_snowflake are called.
+
+    Internally, the underlying data are stored as Snowflake table with rows and columns.
+
+    Parameters
+    ----------
+    data : DataFrame, Series, pandas.DataFrame, ndarray, Iterable or dict, optional
+        Dict can contain ``Series``, arrays, constants, dataclass or list-like objects.
+        If data is a dict, column order follows insertion-order.
+    index : Index or array-like, optional
+        Index to use for resulting frame. Will default to ``RangeIndex`` if no
+        indexing information part of input data and no index provided.
+    columns : Index or array-like, optional
+        Column labels to use for resulting frame. Will default to
+        ``RangeIndex`` if no column labels are provided.
+    dtype : str, np.dtype, or pandas.ExtensionDtype, optional
+        Data type to force. Only a single dtype is allowed. If None, infer.
+    copy : bool, default: False
+        Copy data from inputs. Only affects ``pandas.DataFrame`` / 2d ndarray input.
+    query_compiler : BaseQueryCompiler, optional
+        A query compiler object to create the ``DataFrame`` from.
+
+    Notes
+    -----
+    ``DataFrame`` can be created either from passed `data` or `query_compiler`. If both
+    parameters are provided, data source will be prioritized in the next order:
+
+    1) Modin ``DataFrame`` or ``Series`` passed with `data` parameter.
+    2) Query compiler from the `query_compiler` parameter.
+    3) Various pandas/NumPy/Python data structures passed with `data` parameter.
+
+    The last option is less desirable since import of such data structures is very
+    inefficient, please use previously created Modin structures from the fist two
+    options or import data using highly efficient Modin IO tools (for example
+    ``pd.read_csv``).
+
+    Examples
+    --------
+    Creating a Snowpark pandas DataFrame from a dictionary:
+
+    >>> d = {'col1': [1, 2], 'col2': [3, 4]}
+    >>> df = pd.DataFrame(data=d)
+    >>> df
+       col1  col2
+    0     1     3
+    1     2     4
+
+    Constructing DataFrame from numpy ndarray:
+
+    >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+    ...                    columns=['a', 'b', 'c'])
+    >>> df2
+       a  b  c
+    0  1  2  3
+    1  4  5  6
+    2  7  8  9
+
+    Constructing DataFrame from a numpy ndarray that has labeled columns:
+
+    >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
+    ...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
+    >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
+    ...
+    >>> df3
+       c  a
+    0  3  1
+    1  6  4
+    2  9  7
+
+    Constructing DataFrame from Series/DataFrame:
+
+    >>> ser = pd.Series([1, 2, 3], index=["a", "b", "c"], name = "s")
+    >>> df = pd.DataFrame(data=ser, index=["a", "c"])
+    >>> df
+       s
+    a  1
+    c  3
+    >>> df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"], columns=["x"])
+    >>> df2 = pd.DataFrame(data=df1, index=["a", "c"])
+    >>> df2
+       x
+    a  1
+    c  3
+    """
+
+    def __repr__():
+        """
+        Return a string representation for a particular ``DataFrame``.
+
+        Returns
+        -------
+        str
+        """
+
+    @property
+    def ndim():
+        """
+        Return the number of dimensions of the underlying data, by definition 2.
+        """
+
+    def drop_duplicates():
+        """
+        Return ``DataFrame`` with duplicate rows removed.
+
+        Considering certain columns is optional. Indexes, including time indexes are ignored.
+
+        Parameters
+        ----------
+        subset : column label or sequence of labels, optional
+            Only consider certain columns for identifying duplicates, by default use all columns.
+        keep : {'first', 'last', False}, default 'first'
+            Determines which duplicates (if any) to keep.
+            'first' : Drop duplicates except for the first occurrence.
+            'last' : Drop duplicates except for the last occurrence.
+            False : Drop all duplicates.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+        Returns
+        -------
+        DataFrame or None
+            DataFrame with duplicates removed or None if inplace=True.
+
+        Examples
+        --------
+        Consider dataset containing ramen rating.
+
+        >>> df = pd.DataFrame({
+        ...     'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
+        ...     'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
+        ...     'rating': [4, 4, 3.5, 15, 5]
+        ... })
+        >>> df
+             brand style  rating
+        0  Yum Yum   cup     4.0
+        1  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
+        3  Indomie  pack    15.0
+        4  Indomie  pack     5.0
+
+        By default, it removes duplicate rows based on all columns.
+
+        >>> df.drop_duplicates()
+             brand style  rating
+        0  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
+        3  Indomie  pack    15.0
+        4  Indomie  pack     5.0
+
+        To remove duplicates on specific column(s), use subset.
+
+        >>> df.drop_duplicates(subset=['brand'])
+             brand style  rating
+        0  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
+
+        To remove duplicates and keep last occurrences, use keep.
+
+        >>> df.drop_duplicates(subset=['brand', 'style'], keep='last')
+             brand style  rating
+        1  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
+        4  Indomie  pack     5.0
+        """
+
+    def dropna():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Remove missing values.
+
+        Parameters
+        ----------
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Determine if rows or columns which contain missing values are
+            removed.
+
+            * 0, or 'index' : Drop rows which contain missing values.
+            * 1, or 'columns' : Drop columns which contain missing value.
+
+            .. versionchanged:: 1.0.0
+
+               Pass tuple or list to drop on multiple axes.
+               Only a single axis is allowed.
+
+        how : {'any', 'all'}, default 'any'
+            Determine if row or column is removed from DataFrame, when we have
+            at least one NA or all NA.
+
+            * 'any' : If any NA values are present, drop that row or column.
+            * 'all' : If all values are NA, drop that row or column.
+
+        thresh : int, optional
+            Require that many non-NA values. Cannot be combined with how.
+        subset : column label or sequence of labels, optional
+            Labels along other axis to consider, e.g. if you are dropping rows
+            these would be a list of columns to include.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` or None
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` with NA entries dropped from it or None if ``inplace=True``.
+
+        See Also
+        --------
+        DataFrame.isna : Indicate missing values.
+        DataFrame.notna : Indicate existing (non-missing) values.
+        DataFrame.fillna : Replace missing values.
+        Series.dropna : Drop missing values.
+        Index.dropna : Drop missing indices.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
+        ...                    "toy": [None, 'Batmobile', 'Bullwhip'],
+        ...                    "born": [pd.NaT, pd.Timestamp("1940-04-25"),
+        ...                             pd.NaT]})
+        >>> df
+               name        toy       born
+        0    Alfred       None        NaT
+        1    Batman  Batmobile 1940-04-25
+        2  Catwoman   Bullwhip        NaT
+
+        Drop the rows where at least one element is missing.
+
+        >>> df.dropna()
+             name        toy       born
+        1  Batman  Batmobile 1940-04-25
+
+        Drop the columns where at least one element is missing.
+
+        >>> df.dropna(axis='columns')  # doctest: +SKIP
+               name
+        0    Alfred
+        1    Batman
+        2  Catwoman
+
+        Drop the rows where all elements are missing.
+
+        >>> df.dropna(how='all')
+               name        toy       born
+        0    Alfred       None        NaT
+        1    Batman  Batmobile 1940-04-25
+        2  Catwoman   Bullwhip        NaT
+
+        Keep only the rows with at least 2 non-NA values.
+
+        >>> df.dropna(thresh=2)
+               name        toy       born
+        1    Batman  Batmobile 1940-04-25
+        2  Catwoman   Bullwhip        NaT
+
+        Define in which columns to look for missing values.
+
+        >>> df.dropna(subset=['name', 'toy'])
+               name        toy       born
+        1    Batman  Batmobile 1940-04-25
+        2  Catwoman   Bullwhip        NaT
+
+        Keep the DataFrame with valid entries in the same variable.
+
+        >>> df.dropna(inplace=True)
+        >>> df
+             name        toy       born
+        1  Batman  Batmobile 1940-04-25
+        """
+
+    @property
+    def dtypes():
+        """
+        Return the dtypes in the ``DataFrame``.
+        This returns a Series with the data type of each column.
+        The result's index is the original DataFrame's columns. Columns
+        with mixed types are stored with the  ``object`` dtype.
+
+        The returned dtype for each label is the 'largest' numpy type for the
+        underlying data.
+
+        For labels with integer-type data, int64 is returned.
+
+        For floating point and decimal data, float64 is returned.
+
+        For boolean data, numpy.bool is returned.
+
+        For datetime or timestamp data, datetime64[ns] is returned.
+
+        For all other data types, including string, date, binary or snowflake variants,
+        the dtype object is returned.
+
+        This function is lazy and does NOT trigger evaluation of the underlying
+        ``DataFrame``.
+
+        Note that because the returned dtype(s) may be of a larger type than the underlying
+        data, the result of this function may differ from the dtypes of the output of the
+        :func:`to_pandas()` function.
+        Calling :func:`to_pandas()` triggers materialization into a native
+        pandas DataFrame. The dtypes of this materialized result are the narrowest
+        type(s) that can represent the underlying data (like int16, or int32).
+
+        Returns
+        -------
+        pandas.Series
+            Native pandas (not Snowpark pandas) Series with the dtype for each label.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'float': [1.0],
+        ...                    'int': [1],
+        ...                    'datetime': [pd.Timestamp('20180310')],
+        ...                    'string': ['foo']})
+        >>> df.dtypes
+        float              float64
+        int                  int64
+        datetime    datetime64[ns]
+        string              object
+        dtype: object
+        """
+
+    def duplicated():
+        """
+        Return boolean Series denoting duplicate rows.
+
+        Considering certain columns is optional.
+
+        Parameters
+        ----------
+        subset : column label or sequence of labels, optional
+            Only consider certain columns for identifying duplicates, by
+            default use all the columns.
+        keep : {'first', 'last', False}, default 'first'
+            Determines which duplicates (if any) to mark.
+
+            - ``first`` : Mark duplicates as ``True`` except for the first occurrence.
+            - ``last`` : Mark duplicates as ``True`` except for the last occurrence.
+            - False : Mark all duplicates as ``True``.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`
+            Boolean series for each duplicated rows.
+
+        See Also
+        --------
+        Index.duplicated : Equivalent method on index.
+        Series.duplicated : Equivalent method on Series.
+        Series.drop_duplicates : Remove duplicate values from Series.
+        DataFrame.drop_duplicates : Remove duplicate values from DataFrame.
+
+        Examples
+        --------
+        Consider dataset containing ramen rating.
+
+        >>> df = pd.DataFrame({
+        ...     'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'],
+        ...     'style': ['cup', 'cup', 'cup', 'pack', 'pack'],
+        ...     'rating': [4, 4, 3.5, 15, 5]
+        ... })
+        >>> df
+             brand style  rating
+        0  Yum Yum   cup     4.0
+        1  Yum Yum   cup     4.0
+        2  Indomie   cup     3.5
+        3  Indomie  pack    15.0
+        4  Indomie  pack     5.0
+
+        By default, for each set of duplicated values, the first occurrence
+        is set on False and all others on True.
+
+        >>> df.duplicated()
+        0    False
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        By using 'last', the last occurrence of each set of duplicated values
+        is set on False and all others on True.
+
+        >>> df.duplicated(keep='last')
+        0     True
+        1    False
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        By setting ``keep`` on False, all duplicates are True.
+
+        >>> df.duplicated(keep=False)
+        0     True
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        To find duplicates on specific column(s), use ``subset``.
+
+        >>> df.duplicated(subset=['brand'])
+        0    False
+        1     True
+        2    False
+        3     True
+        4     True
+        dtype: bool
+        """
+
+    @property
+    def empty():
+        """
+        Indicate whether ``DataFrame`` is empty.
+        """
+
+    @property
+    def axes():
+        """
+        Return a list representing the axes of the DataFrame.
+
+        It has the row axis labels and column axis labels as the only members.
+        They are returned in that order.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+        >>> df.axes
+        [Index([0, 1], dtype='int64'), Index(['col1', 'col2'], dtype='object')]
+        """
+
+    @property
+    def shape(self):
+        """
+        Return a tuple representing the dimensionality of the ``DataFrame``.
+        """
+
+    def add_prefix():
+        """
+        Prefix labels with string `prefix`.
+
+        For Series, the row labels are prefixed.
+        For DataFrame, the column labels are prefixed.
+
+        Parameters
+        ----------
+        prefix : str
+            The string to add before each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_suffix: Suffix row labels with string `suffix`.
+        DataFrame.add_suffix: Suffix column labels with string `suffix`.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> s.add_prefix('item_')
+        item_0    1
+        item_1    2
+        item_2    3
+        item_3    4
+        dtype: int64
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+
+        >>> df.add_prefix('col_')
+           col_A  col_B
+        0      1      3
+        1      2      4
+        2      3      5
+        3      4      6
+        """
+
+    def add_suffix():
+        """
+        Suffix labels with string `suffix`.
+
+        For Series, the row labels are suffixed.
+        For DataFrame, the column labels are suffixed.
+
+        Parameters
+        ----------
+        suffix : str
+            The string to add after each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_prefix: Prefix row labels with string `prefix`.
+        DataFrame.add_prefix: Prefix column labels with string `prefix`.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> s.add_suffix('_item')
+        0_item    1
+        1_item    2
+        2_item    3
+        3_item    4
+        dtype: int64
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+
+        >>> df.add_suffix('_col')
+           A_col  B_col
+        0      1      3
+        1      2      4
+        2      3      5
+        3      4      6
+        """
+
+    def applymap():
+        """
+        Apply a function to a Dataframe elementwise.
+
+        This method applies a function that accepts and returns a scalar
+        to every element of a DataFrame.
+
+        Parameters
+        ----------
+        func : callable
+            Python function, returns a single value from a single value.
+        na_action : {None, 'ignore'}, default None
+            If ‘ignore’, propagate NaN values, without passing them to func.
+        **kwargs
+            Additional keyword arguments to pass as keywords arguments to
+            `func`.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Transformed Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`.
+
+        See Also
+        --------
+        :func:`Series.apply <snowflake.snowpark.modin.pandas.Series.apply>` : For applying more complex functions on a Series.
+
+        :func:`DataFrame.apply <snowflake.snowpark.modin.pandas.DataFrame.apply>` : Apply a function row-/column-wise.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([[1, 2.12], [3.356, 4.567]])
+        >>> df
+               0      1
+        0  1.000  2.120
+        1  3.356  4.567
+
+        >>> df.applymap(lambda x: len(str(x)))
+           0  1
+        0  3  4
+        1  5  5
+
+        Like Series.map, NA values can be ignored: (TODO SNOW-888095: re-enable the test once fallback solution is used)
+
+        >>> df_copy = df.copy()
+        >>> df_copy.iloc[0, 0] = pd.NA
+        >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore')  # doctest: +SKIP
+             0  1
+        0  NaN  4
+        1  5.0  5
+
+        When you use the applymap function, a user-defined function (UDF) is generated and
+        applied to each column. However, in many cases, you can achieve the same results
+        more efficiently by utilizing alternative dataframe operations instead of applymap.
+        For example, You could square each number elementwise.
+
+        >>> df.applymap(lambda x: x**2)
+                   0          1
+        0   1.000000   4.494400
+        1  11.262736  20.857489
+
+        But it's better to avoid applymap in that case.
+
+        >>> df ** 2
+                   0          1
+        0   1.000000   4.494400
+        1  11.262736  20.857489
+        """
+
+    _agg_examples_doc = dedent(
+        """
+    Examples
+    --------
+    >>> df = pd.DataFrame([[1, 2, 3],
+    ...                    [4, 5, 6],
+    ...                    [7, 8, 9],
+    ...                    [np.nan, np.nan, np.nan]],
+    ...                   columns=['A', 'B', 'C'])
+
+    Aggregate these functions over the rows.
+
+    >>> df.agg(['sum', 'min'])
+            A     B     C
+    sum  12.0  15.0  18.0
+    min   1.0   2.0   3.0
+
+    Different aggregations per column.
+
+    >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
+            A    B
+    sum  12.0  NaN
+    min   1.0  2.0
+    max   NaN  8.0
+
+    Aggregate over the columns.
+
+    >>> df.agg("max", axis="columns")
+    0    3.0
+    1    6.0
+    2    9.0
+    3    NaN
+    dtype: float64
+
+    Different aggregations per row.
+
+    >>> df.agg({ 0: ["sum"], 1: ["min"] }, axis=1)
+       sum  min
+    0  6.0  NaN
+    1  NaN  4.0
+    """
+    )
+
+    @doc(
+        _shared_docs["aggregate"],
+        klass=_shared_doc_kwargs["klass"],
+        axis=_shared_doc_kwargs["axis"],
+        examples=_agg_examples_doc,
+    )
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def apply():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Apply a function along an axis of the DataFrame.
+
+        Objects passed to the function are Series objects whose index is
+        either the DataFrame's index (``axis=0``) or the DataFrame's columns
+        (``axis=1``). By default (``result_type=None``), the final return type
+        is inferred from the return type of the applied function. Otherwise,
+        it depends on the `result_type` argument.
+
+        Parameters
+        ----------
+        func : function
+            A Python function object to apply to each column or row, or a Python function decorated with @udf.
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Axis along which the function is applied:
+
+            * 0 or 'index': apply function to each column.
+            * 1 or 'columns': apply function to each row.
+
+        raw : bool, default False
+            Determines if row or column is passed as a Series or ndarray object:
+
+            * ``False`` : passes each row or column as a Series to the
+              function.
+            * ``True`` : the passed function will receive ndarray objects
+              instead.
+
+        result_type : {'expand', 'reduce', 'broadcast', None}, default None
+            These only act when ``axis=1`` (columns):
+
+            * 'expand' : list-like results will be turned into columns.
+            * 'reduce' : returns a Series if possible rather than expanding
+              list-like results. This is the opposite of 'expand'.
+            * 'broadcast' : results will be broadcast to the original shape
+              of the DataFrame, the original index and columns will be
+              retained.
+        args : tuple
+            Positional arguments to pass to `func` in addition to the
+            array/series.
+        **kwargs
+            Additional keyword arguments to pass as keywords arguments to
+            `func`.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Result of applying ``func`` along the given axis of the DataFrame.
+
+        See Also
+        --------
+        :func:`Series.apply <snowflake.snowpark.modin.pandas.Series.apply>` : For applying more complex functions on a Series.
+
+        :func:`DataFrame.applymap <snowflake.snowpark.modin.pandas.DataFrame.applymap>` : Apply a function elementwise on a whole DataFrame.
+
+        Notes
+        -----
+        1. When the type annotation of return value is provided on ``func``, the result will be cast
+        to the corresponding dtype. When no type annotation is provided, data will be converted
+        to Variant type in Snowflake and leave as dtype=object. In this case, the return value must
+        be JSON-serializable, which can be a valid input to ``json.dumps`` (e.g., ``dict`` and
+        ``list`` objects are json-serializable, but ``bytes`` and ``datetime.datetime`` objects
+        are not). The return type hint takes effect solely when ``func`` is a series-to-scalar function.
+
+        2. Under the hood, we use Snowflake Vectorized Python UDFs to implement apply() method with
+        `axis=1`. You can find type mappings from Snowflake SQL types to pandas dtypes
+        `here <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch#type-support>`_.
+
+        3. Snowflake supports two types of NULL values in variant data: `JSON NULL and SQL NULL <https://docs.snowflake.com/en/user-guide/semistructured-considerations#null-values>`_.
+        When no type annotation is provided and Variant data is returned, Python ``None`` is translated to
+        JSON NULL, and all other pandas missing values (np.nan, pd.NA, pd.NaT) are translated to SQL NULL.
+
+        4. If ``func`` is a series-to-series function and can also be a scalar-to-scalar function
+        (e.g., ``np.sqrt``, ``lambad x: x+1``), it is equivalent to use `df.applymap()`,
+        which will give better performance.
+
+        5. When ``func`` can return a series with different indices, e.g.,
+        ``lambda x: pd.Series([1, 2], index=["a", "b"] if x.sum() > 2 else ["b", "c"]).``,
+        the values with the same label will be merged together.
+
+        6. The index values of returned series from ``func`` must be json-serializable. For example,
+        ``lambda x: pd.Series([1], index=[bytes(1)])`` will raise a SQL execption.
+
+        7. When ``func`` uses any first-party modules or third-party packages inside the function,
+        you need to add these dependencies via ``session.add_import()`` and ``session.add_packages()``.
+        Alternatively. specify third-party packages with the @udf decorator. When using the @udf decorator,
+        annotations using PandasSeriesType or PandasDataFrameType are not supported.
+
+        8. Snowpark pandas module is currently not supported inside ``func``. If you need to call
+        general pandas API (e.g., ``pd.Timestamp``) inside ``func``, the workaround is to use
+        the actual pandas module.
+
+        Examples
+        --------
+        >>> import snowflake.snowpark.modin.pandas as pd
+        >>> df = pd.DataFrame([[2, 0], [3, 7], [4, 9]], columns=['A', 'B'])
+        >>> df
+           A  B
+        0  2  0
+        1  3  7
+        2  4  9
+
+        Using a numpy universal function (in this case the same as
+        ``np.sqrt(df)``):
+
+        >>> df.apply(np.sqrt)  # doctest: +SKIP
+                  A         B
+        0  1.414062  0.000000
+        1  1.732422  2.646484
+        2  2.000000  3.000000
+
+
+        Using a reducing function on either axis
+
+        >>> df.apply(np.sum, axis=0)  # doctest: +SKIP
+        A     9
+        B    16
+        dtype: int8
+
+        >>> df.apply(np.sum, axis=1)  # doctest: +SKIP
+        0     2
+        1    10
+        2    13
+        dtype: int64
+
+        Returning a list-like will result in a Series
+
+        >>> df.apply(lambda x: [1, 2], axis=1)  # doctest: +SKIP
+        0    [1, 2]
+        1    [1, 2]
+        2    [1, 2]
+        dtype: object
+
+        Passing ``result_type='broadcast'`` will ensure the same shape
+        result, whether list-like or scalar is returned by the function,
+        and broadcast it along the axis. The resulting column names will
+        be the originals.
+
+        >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')  # doctest: +SKIP
+           A  B
+        0  1  2
+        1  1  2
+        2  1  2
+
+        To work with 3rd party packages add them to the current session
+
+        >>> import scipy.stats
+        >>> pd.session.custom_package_usage_config['enabled'] = True
+        >>> pd.session.add_packages(['numpy', scipy])
+        >>> df.apply(lambda x: np.dot(x * scipy.stats.norm.cdf(0), x * scipy.stats.norm.cdf(0)), axis=1)  # doctest: +SKIP
+        0     1.00
+        1    14.50
+        2    24.25
+        dtype: float64
+
+        or annotate the function
+        to pass to apply with the @udf decorator from Snowpark https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.functions.udf.
+
+        >>> from snowflake.snowpark.functions import udf
+        >>> from snowflake.snowpark.types import DoubleType
+        >>> @udf(packages=['statsmodels>0.12'], return_type=DoubleType())
+        ... def autocorr(column):
+        ...    import pandas as pd
+        ...    import statsmodels.tsa.stattools
+        ...    return pd.Series(statsmodels.tsa.stattools.pacf_ols(column.values)).mean()
+        ...
+        >>> df.apply(autocorr, axis=0)  # doctest: +SKIP
+        A    0.857143
+        B    0.428571
+        dtype: float64
+        """
+
+    def groupby():
+        """
+        Group DataFrame using a mapper or by a Series of columns.
+
+        Args:
+            by: mapping, function, label, Snowpark pandas Series or a list of such. Used to determine the groups for the groupby.
+                If by is a function, it’s called on each value of the object’s index. If a dict or Snowpark pandas Series is
+                passed, the Series or dict VALUES will be used to determine the groups (the Series’ values are first aligned;
+                see .align() method). If a list or ndarray of length equal to the selected axis is passed (see the groupby
+                user guide), the values are used as-is to determine the groups. A label or list of labels may be passed
+                to group by the columns in self. Notice that a tuple is interpreted as a (single) key.
+
+            axis: {0 or ‘index’, 1 or ‘columns’}, default 0
+                Split along rows (0) or columns (1). For Series this parameter is unused and defaults to 0.
+
+            level: int, level name, or sequence of such, default None
+                If the axis is a MultiIndex (hierarchical), group by a particular level or levels. Do not specify both by and level.
+
+            as_index: bool, default True
+                    For aggregated output, return object with group labels as the index. Only relevant for DataFrame input.
+                    as_index=False is effectively “SQL-style” grouped output.
+
+            sort: bool, default True
+                Sort group keys. Groupby preserves the order of rows within each group. Note that in pandas,
+                better performance can be achieved by turning sort off, this is not going to be true with
+                SnowparkPandas. When sort=False, the performance will be no better than sort=True.
+
+            group_keys: bool, default True
+                    When calling apply and the by argument produces a like-indexed (i.e. a transform) result, add group
+                    keys to index to identify pieces. By default group keys are not included when the result’s index
+                    (and column) labels match the inputs, and are included otherwise.
+
+            observed: bool, default False
+                    This only applies if any of the groupers are Categoricals. If True: only show observed values for
+                    categorical groupers. If False: show all values for categorical groupers. This parameter is
+                    currently ignored with Snowpark pandas API, since Category type is currently not supported with
+                    Snowpark pandas API.
+
+            dropna: bool, default True
+                    If True, and if group keys contain NA values, NA values together with row/column will be dropped.
+                    If False, NA values will also be treated as the key in groups.
+
+        Returns:
+            Snowpark pandas DataFrameGroupBy: Returns a groupby object that contains information about the groups.
+
+        Examples::
+
+            >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+            ...                               'Parrot', 'Parrot'],
+            ...                    'Max Speed': [380., 370., 24., 26.]})
+            >>> df
+               Animal  Max Speed
+            0  Falcon      380.0
+            1  Falcon      370.0
+            2  Parrot       24.0
+            3  Parrot       26.0
+
+            >>> df.groupby(['Animal']).mean()   # doctest: +NORMALIZE_WHITESPACE
+                    Max Speed
+            Animal
+            Falcon      375.0
+            Parrot       25.0
+
+            **Hierarchical Indexes**
+
+            We can groupby different levels of a hierarchical index
+            using the `level` parameter:
+
+            >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+            ...           ['Captive', 'Wild', 'Captive', 'Wild']]
+            >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+            >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
+            ...                   index=index)
+            >>> df      # doctest: +NORMALIZE_WHITESPACE
+                            Max Speed
+            Animal Type
+            Falcon Captive      390.0
+                   Wild         350.0
+            Parrot Captive       30.0
+                   Wild          20.0
+
+            >>> df.groupby(level=0).mean()      # doctest: +NORMALIZE_WHITESPACE
+                    Max Speed
+            Animal
+            Falcon      370.0
+            Parrot       25.0
+
+            >>> df.groupby(level="Type").mean()     # doctest: +NORMALIZE_WHITESPACE
+                     Max Speed
+            Type
+            Captive      210.0
+            Wild         185.0
+        """
+
+    def keys():
+        """
+        Get columns of the ``DataFrame``.
+        """
+
+    def transform():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Call ``func`` on self producing a Snowpark pandas DataFrame with the same axis shape as self.
+        Currently only callable and string functions are supported since those can be directly mapped to
+        the apply function.
+
+        Examples::
+            Increment every value in dataframe by 1.
+
+            >>> d1 = {'col1': [1, 2, 3], 'col2': [3, 4, 5]}
+            >>> df = pd.DataFrame(data=d1)
+            >>> df
+               col1  col2
+            0     1     3
+            1     2     4
+            2     3     5
+            >>> df.transform(lambda x: x + 1)  # doctest: +SKIP
+               col1  col2
+            0     2     4
+            1     3     5
+            2     4     6
+            >>> df.transform(np.square)  # doctest: +SKIP
+               col1  col2
+            0     1     9
+            1     4    16
+            2     9    25
+            >>> df.transform("square")  # doctest: +SKIP
+               col1  col2
+            0     1     9
+            1     4    16
+            2     9    25
+
+        """
+
+    def transpose():
+        """
+        Transpose index and columns.
+
+        Reflect the DataFrame over its main diagonal by writing rows as columns and vice-versa. The property T is an accessor to the method transpose().
+
+        Args:
+            *args tuple, optional
+                Accepted for compatibility with NumPy.  Note these arguments are ignored in the snowpark pandas
+                implementation unless go through a fallback path, in which case they may be used by the native
+                pandas implementation.
+
+            copy bool, default False
+                Whether to copy the data after transposing, even for DataFrames with a single dtype.  The snowpark
+                pandas implementation ignores this parameter.
+
+            Note that a copy is always required for mixed dtype DataFrames, or for DataFrames with any extension types.
+
+        Returns:
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+                The transposed DataFrame.
+
+        Examples::
+            Square DataFrame with homogeneous dtype
+
+            >>> d1 = {'col1': [1, 2], 'col2': [3, 4]}
+            >>> df1 = pd.DataFrame(data=d1)
+            >>> df1
+               col1  col2
+            0     1     3
+            1     2     4
+
+            >>> df1_transposed = df1.T  # or df1.transpose()
+            >>> df1_transposed
+                  0  1
+            col1  1  2
+            col2  3  4
+
+            When the dtype is homogeneous in the original DataFrame, we get a transposed DataFrame with the same dtype:
+
+            >>> df1.dtypes
+            col1    int64
+            col2    int64
+            dtype: object
+
+            >>> df1_transposed.dtypes
+            0    int64
+            1    int64
+            dtype: object
+
+            Non-square DataFrame with mixed dtypes
+
+            >>> d2 = {'name': ['Alice', 'Bob'],
+            ...      'score': [9.5, 8],
+            ...      'employed': [False, True],
+            ...       'kids': [0, 0]}
+            >>> df2 = pd.DataFrame(data=d2)
+            >>> df2
+                name  score  employed  kids
+            0  Alice    9.5     False     0
+            1    Bob    8.0      True     0
+
+            >>> df2_transposed = df2.T  # or df2.transpose()
+            >>> df2_transposed
+                          0     1
+            name      Alice   Bob
+            score       9.5   8.0
+            employed  False  True
+            kids          0     0
+
+            When the DataFrame has mixed dtypes, we get a transposed DataFrame with the object dtype:
+
+            >>> df2.dtypes
+            name         object
+            score       float64
+            employed       bool
+            kids          int64
+            dtype: object
+
+            >>> df2_transposed.dtypes
+            0    object
+            1    object
+            dtype: object
+        """
+
+    T = property(transpose)
+
+    def add():
+        """
+        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `add`).
+        """
+
+    def assign():
+        """
+        Assign new columns to a ``DataFrame``.
+        """
+
+    def boxplot():
+        """
+        Make a box plot from ``DataFrame`` columns.
+        """
+
+    def combine():
+        """
+        Perform column-wise combine with another ``DataFrame``.
+        """
+
+    def compare():
+        """
+        Compare to another ``DataFrame`` and show the differences.
+        """
+
+    def corr():
+        """
+        Compute pairwise correlation of columns, excluding NA/null values.
+        """
+
+    def corrwith():
+        """
+        Compute pairwise correlation.
+        """
+
+    def cov():
+        pass
+
+    def dot():
+        """
+        Compute the matrix multiplication between the ``DataFrame`` and `other`.
+        """
+
+    def eq():
+        """
+        Perform equality comparison of ``DataFrame`` and `other` (binary operator `eq`).
+        """
+
+    def equals():
+        """
+        Test whether two objects contain the same elements.
+        """
+
+    def eval():
+        """
+        Evaluate a string describing operations on ``DataFrame`` columns.
+        """
+
+    def fillna():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        value : scalar, dict, Series, or DataFrame
+            Value to use to fill holes (e.g. 0), alternately a
+            dict/Series/DataFrame of values specifying which value to use for
+            each index (for a Series) or column (for a DataFrame).  Values not
+            in the dict/Series/DataFrame will not be filled. This value cannot
+            be a list.
+        method : {{'backfill', 'bfill', 'ffill', None}}, default None
+            Method to use for filling holes in reindexed Series:
+
+            * ffill: propagate last valid observation forward to next valid.
+            * backfill / bfill: use next valid observation to fill gap.
+
+            .. deprecated:: 2.1.0
+                Use ffill or bfill instead.
+
+        axis : {axes_single_arg}
+            Axis along which to fill missing values. For `Series`
+            this parameter is unused and defaults to 0.
+        inplace : bool, default False
+            If True, fill in-place. Note: this will modify any
+            other views on this object (e.g., a no-copy slice for a column in a
+            DataFrame).
+        limit : int, default None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled. Must be greater than 0 if not None.
+        downcast : dict, default is None
+            A dict of item->dtype of what to downcast if possible,
+            or the string 'infer' which will try to downcast to an appropriate
+            equal type (e.g. float64 to int64 if possible).
+
+            .. deprecated:: 2.2.0
+
+        Returns
+        -------
+        {klass} or None
+            Object with missing values filled or None if ``inplace=True``.
+
+        See Also
+        --------
+        ffill : Fill values by propagating the last valid observation to next valid.
+        bfill : Fill values by using the next valid observation to fill the gap.
+        interpolate : Fill NaN values using interpolation.
+        reindex : Conform object to new index.
+        asfreq : Convert TimeSeries to specified frequency.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
+        ...                    [3, 4, np.nan, 1],
+        ...                    [np.nan, np.nan, np.nan, np.nan],
+        ...                    [np.nan, 3, np.nan, 4]],
+        ...                   columns=list("ABCD"))
+        >>> df
+             A    B   C    D
+        0  NaN  2.0 NaN  0.0
+        1  3.0  4.0 NaN  1.0
+        2  NaN  NaN NaN  NaN
+        3  NaN  3.0 NaN  4.0
+
+        Replace all NaN elements with 0s.
+
+        >>> df.fillna(0)
+             A    B    C    D
+        0  0.0  2.0  0.0  0.0
+        1  3.0  4.0  0.0  1.0
+        2  0.0  0.0  0.0  0.0
+        3  0.0  3.0  0.0  4.0
+
+        Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
+        2, and 3 respectively.
+
+        >>> values = {"A": 0, "B": 1, "C": 2, "D": 3}
+        >>> df.fillna(value=values)
+             A    B    C    D
+        0  0.0  2.0  2.0  0.0
+        1  3.0  4.0  2.0  1.0
+        2  0.0  1.0  2.0  3.0
+        3  0.0  3.0  2.0  4.0
+
+        Only replace the first NaN element.
+
+        >>> df.fillna(value=values, limit=1)  # doctest: +SKIP
+             A    B    C    D
+        0  0.0  2.0  2.0  0.0
+        1  3.0  4.0  NaN  1.0
+        2  NaN  1.0  NaN  3.0
+        3  NaN  3.0  NaN  4.0
+
+        When filling using a DataFrame, replacement happens along
+        the same column names and same indices
+
+        >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE"))
+        >>> df.fillna(df2)
+             A    B    C    D
+        0  0.0  2.0  0.0  0.0
+        1  3.0  4.0  0.0  1.0
+        2  0.0  0.0  0.0  NaN
+        3  0.0  3.0  0.0  4.0
+
+        Note that column D is not affected since it is not present in df2.
+        """
+
+    def floordiv():
+        """
+        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `floordiv`).
+        """
+
+    @classmethod
+    def from_dict():
+        """
+        Construct ``DataFrame`` from dict of array-like or dicts.
+        """
+
+    def from_records():
+        """
+        Convert structured or record ndarray to ``DataFrame``.
+        """
+
+    def ge():
+        """
+        Get greater than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
+        """
+
+    def gt():
+        """
+        Get greater than comparison of ``DataFrame`` and `other`, element-wise (binary operator `ge`).
+        """
+
+    def hist():
+        """
+        Make a histogram of the ``DataFrame``.
+        """
+
+    def info():
+        """
+        Print a concise summary of the ``DataFrame``. Snowflake
+        DataFrames mirror the output of pandas df.info but with some
+        specific limitations ( zeroed memory usage, no index information ).
+
+        Parameters
+        ----------
+        verbose : bool, optional
+            Whether to print the full summary. By default, the setting in
+            ``pandas.options.display.max_info_columns`` is followed.
+        buf : writable buffer, defaults to sys.stdout
+            Where to send the output. By default, the output is printed to
+            sys.stdout. Pass a writable buffer if you need to further process
+            the output.
+        max_cols : int, optional
+            When to switch from the verbose to the truncated output. If the
+            DataFrame has more than `max_cols` columns, the truncated output
+            is used. By default, the setting in
+            ``pandas.options.display.max_info_columns`` is used.
+        memory_usage : bool, str, optional
+            Displays 0 for memory usage, since the memory usage of a snowflake
+            dataframe is remote and partially indeterminant.
+        show_counts : bool, optional
+            Whether to show the non-null counts. By default, this is shown
+            only if the DataFrame is smaller than
+            ``pandas.options.display.max_info_rows`` and
+            ``pandas.options.display.max_info_columns``. A value of True always
+            shows the counts, and False never shows the counts.
+
+        Returns
+        -------
+        None
+            This method prints a summary of a DataFrame and returns None.
+
+        Examples
+        --------
+            >>> df = pd.DataFrame({'COL1': [1, 2, 3],
+            ...                    'COL2': ['A', 'B', 'C']})
+
+            >>> df.info() # doctest: +NORMALIZE_WHITESPACE
+            <class 'snowflake.snowpark.modin.pandas.dataframe.DataFrame'>
+            SnowflakeIndex
+            Data columns (total 2 columns):
+             #   Column  Non-Null Count  Dtype
+            ---  ------  --------------  -----
+             0   COL1    3 non-null      int64
+             1   COL2    3 non-null      object
+            dtypes: int64(1), object(1)
+            memory usage: 0.0 bytes
+        """
+
+    def insert():
+        """
+        Insert column into DataFrame at specified location.
+
+        Raises a ValueError if `column` is already contained in the DataFrame,
+        unless `allow_duplicates` is set to True.
+
+        Parameters
+        ----------
+        loc : int
+            Insertion index. Must verify 0 <= loc <= len(columns).
+        column : str, number, or hashable object
+            Label of the inserted column.
+        value : Scalar, Series, or array-like
+        allow_duplicates : bool, optional, default lib.no_default
+
+        See Also
+        --------
+        Index.insert : Insert new item by index.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+        >>> df
+           col1  col2
+        0     1     3
+        1     2     4
+        >>> df.insert(1, "newcol", [99, 99])
+        >>> df
+           col1  newcol  col2
+        0     1      99     3
+        1     2      99     4
+        >>> df.insert(0, "col1", [100, 100], allow_duplicates=True)
+        >>> df
+           col1  col1  newcol  col2
+        0   100     1      99     3
+        1   100     2      99     4
+
+        Notice that pandas uses index alignment in case of `value` from type `Series`:
+
+        >>> df.insert(0, "col0", pd.Series([5, 6], index=[1, 2]))
+        >>> df
+           col0  col1  col1  newcol  col2
+        0   NaN   100     1      99     3
+        1   5.0   100     2      99     4
+        """
+
+    def interpolate():
+        pass
+
+    def iterrows():
+        """
+        Iterate over ``DataFrame`` rows as (index, ``Series``) pairs.
+
+        Yields
+        ------
+        index : label or tuple of label
+            The index of the row. A tuple for a `MultiIndex`.
+        data : Series
+            The data of the row as a Series.
+
+        See Also
+        --------
+        DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
+        DataFrame.items : Iterate over (column name, Series) pairs.
+
+        Notes
+        -----
+        1. Iterating over rows is an antipattern in Snowpark pandas and pandas. Use df.apply() or other aggregation
+           methods when possible instead of iterating over a DataFrame. Iterators and for loops do not scale well.
+        2. Because ``iterrows`` returns a Series for each row, it does **not** preserve dtypes across the rows (dtypes
+           are preserved across columns for DataFrames).
+        3. You should **never modify** something you are iterating over. This will not work. The iterator returns a copy
+           of the data and writing to it will have no effect.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([[1, 1.5], [2, 2.5], [3, 7.8]], columns=['int', 'float'])
+        >>> df
+           int  float
+        0    1    1.5
+        1    2    2.5
+        2    3    7.8
+
+        Print the first row's index and the row as a Series.
+        >>> index_and_row = next(df.iterrows())
+        >>> index_and_row
+        (0, int      1.0
+        float    1.5
+        Name: 0, dtype: float64)
+
+        Print the first row as a Series.
+        >>> row = next(df.iterrows())[1]
+        >>> row
+        int      1.0
+        float    1.5
+        Name: 0, dtype: float64
+
+        Pretty printing every row.
+        >>> for row in df.iterrows():
+        ...     print(row[1])
+        ...
+        int      1.0
+        float    1.5
+        Name: 0, dtype: float64
+        int      2.0
+        float    2.5
+        Name: 1, dtype: float64
+        int      3.0
+        float    7.8
+        Name: 2, dtype: float64
+
+        >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1]], columns=['A', 'B', 'C'])
+        >>> df
+           A  B  C
+        0  0  2  3
+        1  0  4  1
+
+        Pretty printing the results to distinguish index and Series.
+        >>> for row in df.iterrows():
+        ...     print(f"Index: {row[0]}")
+        ...     print("Series:")
+        ...     print(row[1])
+        ...
+        Index: 0
+        Series:
+        A    0
+        B    2
+        C    3
+        Name: 0, dtype: int64
+        Index: 1
+        Series:
+        A    0
+        B    4
+        C    1
+        Name: 1, dtype: int64
+        """
+
+    def items():
+        """
+        Iterate over (column name, ``Series``) pairs.
+        """
+
+    def iteritems():
+        """
+        Iterate over (column name, ``Series``) pairs.
+        """
+
+    def itertuples():
+        """
+        Iterate over DataFrame rows as namedtuples.
+
+        Parameters
+        ----------
+        index : bool, default True
+            If True, return the index as the first element of the tuple.
+        name : str or None, default "Pandas"
+            The name of the returned namedtuples or None to return regular tuples.
+
+        Returns
+        -------
+        iterator
+            An object to iterate over namedtuples for each row in the DataFrame with the first field possibly being the
+            index and following fields being the column values.
+
+        See Also
+        --------
+        DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
+        DataFrame.items : Iterate over (column name, Series) pairs.
+
+        Notes
+        -----
+        1. Iterating over rows is an antipattern in Snowpark pandas and pandas. Use df.apply() or other aggregation
+           methods when possible instead of iterating over a DataFrame. Iterators and for loops do not scale well.
+        2. The column names will be renamed to positional names if they are invalid Python identifiers, repeated, or
+           start with an underscore (follows namedtuple rules).
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, index=['dog', 'hawk'])
+        >>> df
+              num_legs  num_wings
+        dog          4          0
+        hawk         2          2
+        >>> for row in df.itertuples():
+        ...     print(row)
+        ...
+        Pandas(Index='dog', num_legs=4, num_wings=0)
+        Pandas(Index='hawk', num_legs=2, num_wings=2)
+
+        By setting the `index` parameter to False we can remove the index as the first element of the tuple:
+        >>> for row in df.itertuples(index=False):
+        ...     print(row)
+        ...
+        Pandas(num_legs=4, num_wings=0)
+        Pandas(num_legs=2, num_wings=2)
+
+        >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
+        ...      index=['cobra', 'viper', 'sidewinder'],
+        ...      columns=['max_speed', 'shield'])
+        >>> df
+                    max_speed  shield
+        cobra               1       2
+        viper               4       5
+        sidewinder          7       8
+
+        >>> for row in df.itertuples():
+        ...     print(row)
+        ...
+        Pandas(Index='cobra', max_speed=1, shield=2)
+        Pandas(Index='viper', max_speed=4, shield=5)
+        Pandas(Index='sidewinder', max_speed=7, shield=8)
+
+        Rename the namedtuple and create it without the index values.
+        >>> for row in df.itertuples(name="NewName", index=False):
+        ...     print(row)
+        ...
+        NewName(max_speed=1, shield=2)
+        NewName(max_speed=4, shield=5)
+        NewName(max_speed=7, shield=8)
+
+        When name is None, return a regular tuple.
+        >>> for row in df.itertuples(name=None):
+        ...     print(row)
+        ...
+        ('cobra', 1, 2)
+        ('viper', 4, 5)
+        ('sidewinder', 7, 8)
+        """
+
+    def join():
+        """
+        Join columns of another DataFrame.
+
+        Join columns with `other` DataFrame either on index or on a key
+        column. Efficiently join multiple DataFrame objects by index at once by
+        passing a list.
+
+        Parameters
+        ----------
+        other : :class:`~snowflake.snowpark.modin.pandas.DataFrame`, :class:`~snowflake.snowpark.modin.pandas.Series`, or a list containing any combination of them
+            Index should be similar to one of the columns in this one. If a
+            Series is passed, its name attribute must be set, and that will be
+            used as the column name in the resulting joined DataFrame.
+        on : str, list of str, or array-like, optional
+            Column or index level name(s) in the caller to join on the index
+            in `other`, otherwise joins index-on-index. If multiple
+            values given, the `other` DataFrame must have a MultiIndex. Can
+            pass an array as the join key if it is not already contained in
+            the calling DataFrame. Like an Excel VLOOKUP operation.
+        how : {'left', 'right', 'outer', 'inner'}, default 'left'
+            How to handle the operation of the two objects.
+
+            * left: use calling frame's index (or column if on is specified)
+            * right: use `other`'s index.
+            * outer: form union of calling frame's index (or column if on is
+              specified) with `other`'s index, and sort it.
+              lexicographically.
+            * inner: form intersection of calling frame's index (or column if
+              on is specified) with `other`'s index, preserving the order
+              of the calling's one.
+            * cross: creates the cartesian product from both frames, preserves the order
+              of the left keys.
+        lsuffix : str, default ''
+            Suffix to use from left frame's overlapping columns.
+        rsuffix : str, default ''
+            Suffix to use from right frame's overlapping columns.
+        sort : bool, default False
+            Order result DataFrame lexicographically by the join key. If False,
+            the order of the join key depends on the join type (how keyword).
+        validate : str, optional
+            If specified, checks if join is of specified type.
+            * "one_to_one" or "1:1": check if join keys are unique in both left
+            and right datasets.
+            * "one_to_many" or "1:m": check if join keys are unique in left dataset.
+            * "many_to_one" or "m:1": check if join keys are unique in right dataset.
+            * "many_to_many" or "m:m": allowed, but does not result in checks.
+
+        Returns
+        -------
+        Snowapark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            A dataframe containing columns from both the caller and `other`.
+
+        Notes
+        -----
+        Parameters `on`, `lsuffix`, and `rsuffix` are not supported when
+        passing a list of `DataFrame` objects.
+
+        Snowpark pandas API doesn't currently support distributed computation of join with
+        'validate' argument.
+
+        Examples
+        --------
+            >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
+            ...                    'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
+
+            >>> df
+              key   A
+            0  K0  A0
+            1  K1  A1
+            2  K2  A2
+            3  K3  A3
+            4  K4  A4
+            5  K5  A5
+
+            >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
+            ...                       'B': ['B0', 'B1', 'B2']})
+
+            >>> other
+              key   B
+            0  K0  B0
+            1  K1  B1
+            2  K2  B2
+
+            Join DataFrames using their indexes.
+
+            >>> df.join(other, lsuffix='_caller', rsuffix='_other')
+              key_caller   A key_other     B
+            0         K0  A0        K0    B0
+            1         K1  A1        K1    B1
+            2         K2  A2        K2    B2
+            3         K3  A3      None  None
+            4         K4  A4      None  None
+            5         K5  A5      None  None
+
+            If we want to join using the key columns, we need to set key to be
+            the index in both `df` and `other`. The joined DataFrame will have
+            key as its index.
+
+            >>> df.set_index('key').join(other.set_index('key'))  # doctest: +NORMALIZE_WHITESPACE
+                  A     B
+            key
+            K0   A0    B0
+            K1   A1    B1
+            K2   A2    B2
+            K3   A3  None
+            K4   A4  None
+            K5   A5  None
+
+            Another option to join using the key columns is to use the `on`
+            parameter. DataFrame.join always uses `other`'s index but we can use
+            any column in `df`. This method preserves the original DataFrame's
+            index in the result.
+
+            >>> df.join(other.set_index('key'), on='key')
+              key   A     B
+            0  K0  A0    B0
+            1  K1  A1    B1
+            2  K2  A2    B2
+            3  K3  A3  None
+            4  K4  A4  None
+            5  K5  A5  None
+
+            Using non-unique key values shows how they are matched.
+
+            >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'],
+            ...                    'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
+
+            >>> df
+              key   A
+            0  K0  A0
+            1  K1  A1
+            2  K1  A2
+            3  K3  A3
+            4  K0  A4
+            5  K1  A5
+
+            TODO: SNOW-890653 Enable this test
+
+            >>> df.join(other.set_index('key'), on='key', validate='m:1')  # doctest: +SKIP
+              key   A    B
+            0  K0  A0   B0
+            1  K1  A1   B1
+            2  K1  A2   B1
+            3  K3  A3  NaN
+            4  K0  A4   B0
+            5  K1  A5   B1
+        """
+
+    def isna():
+        """
+        Detect missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are missing (NaN in numeric
+        arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : scalar or array-like
+                Object to check for null or missing values.
+
+        Returns
+        -------
+        bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is missing.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.isna()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        >>> df.isnull()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        """
+
+    def isnull():
+        """
+        Detect missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are missing (NaN in numeric
+        arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : scalar or array-like
+                Object to check for null or missing values.
+
+        Returns
+        -------
+        bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is missing.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.isna()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        >>> df.isnull()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        """
+
+    def isetitem():
+        pass
+
+    def le():
+        """
+        Get less than or equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
+        """
+
+    def lt():
+        """
+        Get less than comparison of ``DataFrame`` and `other`, element-wise (binary operator `le`).
+        """
+
+    def melt():
+        """
+        Unpivot a ``DataFrame`` from wide to long format, optionally leaving identifiers set.
+
+        Parameters
+        ----------
+        id_vars : list of identifiers to retain in the result
+        value_vars : list of columns to unpivot on
+               defaults to all columns, excluding the id_vars columns
+        var_name : variable name, defaults to "variable"
+        value_name : value name, defaults to "value"
+        col_level : int, not implemented
+        ignore_index : bool
+
+        Returns
+        -------
+            DataFrame
+                unpivoted on the value columns
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
+        ...           'B': {0: 1, 1: 3, 2: 5},
+        ...           'C': {0: 2, 1: 4, 2: 6}})
+        >>> df
+           A  B  C
+        0  a  1  2
+        1  b  3  4
+        2  c  5  6
+
+        >>> df.melt()
+          variable value
+        0        A     a
+        1        A     b
+        2        A     c
+        3        B     1
+        4        B     3
+        5        B     5
+        6        C     2
+        7        C     4
+        8        C     6
+
+        >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
+        ...           'B': {0: 1, 1: 3, 2: 5},
+        ...           'C': {0: 2, 1: 4, 2: 6}})
+        >>> df.melt(id_vars=['A'], value_vars=['B'], var_name='myVarname', value_name='myValname')
+           A myVarname  myValname
+        0  a         B          1
+        1  b         B          3
+        2  c         B          5
+
+        """
+
+    def memory_usage():
+        """
+        Return the memory usage of each column in bytes.
+        """
+
+    def merge():
+        pass
+
+    def mod():
+        """
+        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `mod`).
+        """
+
+    def mul():
+        """
+        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
+        """
+
+    multiply = mul
+
+    def rmul():
+        """
+        Get multiplication of ``DataFrame`` and `other`, element-wise (binary operator `mul`).
+        """
+
+    def ne():
+        """
+        Get not equal comparison of ``DataFrame`` and `other`, element-wise (binary operator `ne`).
+        """
+
+    def nlargest():
+        """
+        Return the first `n` rows ordered by `columns` in descending order.
+        """
+
+    def nsmallest():
+        """
+        Return the first `n` rows ordered by `columns` in ascending order.
+        """
+
+    def unstack():
+        """
+        Pivot a level of the (necessarily hierarchical) index labels.
+        """
+
+    def pivot():
+        """
+        Return reshaped ``DataFrame`` organized by given index / column values.
+        """
+
+    def pivot_table():
+        """
+        Create a spreadsheet-style pivot table as a ``DataFrame``.
+
+        The levels in the pivot table will be stored in MultiIndex objects
+        (hierarchical indexes) on the index and columns of the result DataFrame.
+
+        Parameters
+        ----------
+        values : list-like or scalar, optional
+            Column or columns to aggregate.
+        index : column, Grouper, array, or list of the previous
+            Keys to group by on the pivot table index. If a list is passed,
+            it can contain any of the other types (except list). If an array is
+            passed, it must be the same length as the data and will be used in
+            the same manner as column values.
+        columns : column, Grouper, array, or list of the previous
+            Keys to group by on the pivot table column. If a list is passed,
+            it can contain any of the other types (except list). If an array is
+            passed, it must be the same length as the data and will be used in
+            the same manner as column values.
+        aggfunc : function, list of functions, dict in string, default "mean".
+            If a list of functions is passed, the resulting pivot table will have
+            hierarchical columns whose top level are the function names
+            (inferred from the function objects themselves).
+            If a dict is passed, the key is column to aggregate and the value is
+            function or list of functions. If ``margin=True``, aggfunc will be
+            used to calculate the partial aggregates.
+        fill_value : scalar, default None
+            Value to replace missing values with (in the resulting pivot table,
+            after aggregation).
+        margins : bool, default False
+            If ``margins=True``, special ``All`` columns and rows
+            will be added with partial group aggregates across the categories
+            on the rows and columns.
+        dropna : bool, default True
+            Do not include columns whose entries are all NaN. If True,
+            rows with a NaN value in any column will be omitted before
+            computing margins.
+        margins_name : str, default 'All'
+            Name of the row / column that will contain the totals
+            when margins is True.
+        observed : bool, default False
+            This only applies if any of the groupers are Categoricals.
+            If True: only show observed values for categorical groupers.
+            If False: show all values for categorical groupers.
+
+        sort : bool, default True
+            Specifies if the result should be sorted.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            An Excel style pivot table.
+
+        Notes
+        -----
+        Raise NotImplementedError if
+
+            * margins, observed, or sort is given;
+            * or index, columns, or values is not str;
+            * or DataFrame contains MultiIndex;
+            * or any argfunc is not "count", "mean", "min", "max", or "sum"
+
+        See Also
+        --------
+        DataFrame.pivot : Pivot without aggregation that can handle
+            non-numeric data.
+        DataFrame.melt: Unpivot a DataFrame from wide to long format,
+            optionally leaving identifiers set.
+        wide_to_long : Wide panel to long format. Less flexible but more
+            user-friendly than melt.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
+        ...                          "bar", "bar", "bar", "bar"],
+        ...                    "B": ["one", "one", "one", "two", "two",
+        ...                          "one", "one", "two", "two"],
+        ...                    "C": ["small", "large", "large", "small",
+        ...                          "small", "large", "small", "small",
+        ...                          "large"],
+        ...                    "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+        ...                    "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
+        >>> df
+             A    B      C  D  E
+        0  foo  one  small  1  2
+        1  foo  one  large  2  4
+        2  foo  one  large  2  5
+        3  foo  two  small  3  5
+        4  foo  two  small  3  6
+        5  bar  one  large  4  6
+        6  bar  one  small  5  8
+        7  bar  two  small  6  9
+        8  bar  two  large  7  9
+
+        This first example aggregates values by taking the sum.
+
+        >>> table = df.pivot_table(values='D', index=['A', 'B'],
+        ...                        columns=['C'], aggfunc="sum")
+        >>> table  # doctest: +NORMALIZE_WHITESPACE
+        C        large  small
+        A   B
+        bar one    4.0      5
+            two    7.0      6
+        foo one    4.0      1
+            two    NaN      6
+
+        We can also fill missing values using the `fill_value` parameter.
+
+        >>> table = df.pivot_table(values='D', index=['A', 'B'],
+        ...                        columns=['C'], aggfunc="sum", fill_value=0)
+        >>> table  # doctest: +NORMALIZE_WHITESPACE
+        C        large  small
+        A   B
+        bar one    4.0      5
+            two    7.0      6
+        foo one    4.0      1
+            two    NaN      6
+
+        >>> table = df.pivot_table(values=['D', 'E'], index=['A', 'C'],
+        ...                        aggfunc={'D': "mean", 'E': "mean"})
+        >>> table  # doctest: +NORMALIZE_WHITESPACE
+                          D         E
+                          D         E
+        A   C
+        bar large  5.500000  7.500000
+            small  5.500000  8.500000
+        foo large  2.000000  4.500000
+            small  2.333333  4.333333
+
+        >>> table = df.pivot_table(values=['D', 'E'], index=['A', 'C'],
+        ...                        aggfunc={'D': "mean",
+        ...                                 'E': ["min", "max", "mean"]})
+        >>> table  # doctest: +NORMALIZE_WHITESPACE
+                          D   E
+                       mean max      mean min
+                          D   E         E   E
+        A   C
+        bar large  5.500000   9  7.500000   6
+            small  5.500000   9  8.500000   8
+        foo large  2.000000   5  4.500000   4
+            small  2.333333   6  4.333333   2
+        """
+
+    def plot():
+        """
+        Make plots of ``DataFrame``.
+        """
+
+    def pow():
+        """
+        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `pow`).
+
+        Note:
+            Native pandas doesn't allow ``a ** b``, where ``a`` is an integer and ``b``
+            is a negative integer. However, Snowpark pandas API allows it and return the correct result.
+            For example, ``pd.DataFrame([5]).pow(-7)`` is allowed, whereas it will raise an
+            exception in native pandas.
+        """
+
+    def prod():
+        """
+        Return the product of the values over the requested axis.
+        """
+
+    product = prod
+
+    def quantile():
+        """
+        Return values at the given quantile over requested axis.
+
+        Parameters
+        ----------
+        q: float or array-like of float, default 0.5
+            Value between 0 <= q <= 1, the quantile(s) to compute.
+        axis: {0 or 'index', 1 or 'columns'}, default 0
+            Axis across which to compute quantiles.
+        numeric_only: bool, default False
+            Include only data where `is_numeric_dtype` is true.
+            When True, bool columns are included, but attempting to compute quantiles across
+            bool values is an ill-defined error in both pandas and Snowpark pandas.
+        interpolation: {"linear", "lower", "higher", "midpoint", "nearest"}, default "linear"
+            Specifies the interpolation method to use if a quantile lies between two data points
+            *i* and *j*:
+
+            * linear: *i* + (*j* - *i*) * *fraction*, where *fraction* is the fractional part of the
+              index surrounded by *i* and *j*.
+            * lower: *i*.
+            * higher: *j*.
+            * nearest: *i* or *j*, whichever is nearest.
+            * midpoint: (*i* + *j*) / 2.
+
+        method: {"single", "table"}, default "single"
+            Whether to compute quantiles per-column ("single") or over all columns ("table").
+            When "table", the only allowed interpolation methods are "nearest", "lower", and "higher".
+
+        Returns
+        -------
+        Series or DataFrame
+            If ``q`` is an array, a DataFrame will be returned where the index is ``q``, the columns
+            are the columns of ``self``, and the values are the quantiles.
+            If ``q`` is a float, a Series will be returned where the index is the columns of
+            ``self`` and the values are the quantiles.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), columns=['a', 'b'])
+
+        With a scalar q:
+
+        >>> df.quantile(.1) # doctest: +NORMALIZE_WHITESPACE
+        a    1.3
+        b    3.7
+        Name: 0.1, dtype: float64
+
+        With a list q:
+
+        >>> df.quantile([.1, .5]) # doctest: +NORMALIZE_WHITESPACE
+               a     b
+        0.1  1.3   3.7
+        0.5  2.5  55.0
+
+        Values considered NaN do not affect the result:
+
+        >>> df = pd.DataFrame({"a": [None, 0, 25, 50, 75, 100, np.nan]})
+        >>> df.quantile([0, 0.25, 0.5, 0.75, 1]) # doctest: +NORMALIZE_WHITESPACE
+                  a
+        0.00    0.0
+        0.25   25.0
+        0.50   50.0
+        0.75   75.0
+        1.00  100.0
+
+        Notes
+        -----
+        Currently only supports calls with axis=0.
+
+        Also, unsupported if q is a Snowpandas DataFrame or Series.
+        """
+
+    def query():
+        """
+        Query the columns of a ``DataFrame`` with a boolean expression.
+        """
+
+    def rename():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Rename columns or index labels.
+
+        Function / dict values must be unique (1-to-1). Labels not contained in
+        a dict / Series will be left as-is. Extra labels listed don't throw an
+        error.
+
+        Parameters
+        ----------
+        mapper : dict-like or function
+            Dict-like or function transformations to apply to
+            that axis' values. Use either ``mapper`` and ``axis`` to
+            specify the axis to target with ``mapper``, or ``index`` and
+            ``columns``.
+        index : dict-like or function
+            Alternative to specifying axis (``mapper, axis=0``
+            is equivalent to ``index=mapper``).
+        columns : dict-like or function
+            Alternative to specifying axis (``mapper, axis=1``
+            is equivalent to ``columns=mapper``).
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Axis to target with ``mapper``. Can be either the axis name
+            ('index', 'columns') or number (0, 1). The default is 'index'.
+        copy : bool, default True
+            Also copy underlying data. copy has been ignored with Snowflake execution engine.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+            If True then value of copy is ignored.
+        level : int or level name, default None
+            In case of a MultiIndex, only rename labels in the specified
+            level.
+        errors : {'ignore', 'raise'}, default 'ignore'
+            If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`,
+            or `columns` contains labels that are not present in the Index
+            being transformed.
+            If 'ignore', existing keys will be renamed and extra keys will be
+            ignored.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` or None
+            DataFrame with the renamed axis labels or None if ``inplace=True``.
+
+        Raises
+        ------
+        KeyError
+            If any of the labels is not found in the selected axis and
+            "errors='raise'".
+
+        See Also
+        --------
+        DataFrame.rename_axis : Set the name of the axis.
+
+        Examples
+        --------
+        ``DataFrame.rename`` supports two calling conventions
+
+        * ``(index=index_mapper, columns=columns_mapper, ...)``
+        * ``(mapper, axis={'index', 'columns'}, ...)``
+
+        We *highly* recommend using keyword arguments to clarify your
+        intent.
+
+        Rename columns using a mapping:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        >>> df.rename(columns={"A": "a", "B": "c"})
+           a  c
+        0  1  4
+        1  2  5
+        2  3  6
+
+        Rename index using a mapping:
+
+        >>> df.rename(index={0: "x", 1: "y", 2: "z"})
+           A  B
+        x  1  4
+        y  2  5
+        z  3  6
+
+        Cast index labels to a different type:
+
+        >>> df.index
+        Index([0, 1, 2], dtype='int64')
+        >>> df.rename(index=str).index  # doctest: +SKIP
+        Index(['0', '1', '2'], dtype='object')
+
+        >>> df.rename(columns={"A": "a", "B": "b", "C": "c"}, errors="raise")
+        Traceback (most recent call last):
+          ...
+        KeyError: "['C'] not found in axis"
+
+        Using axis-style parameters:
+
+        >>> df.rename(str.lower, axis='columns')
+           a  b
+        0  1  4
+        1  2  5
+        2  3  6
+
+        >>> df.rename({1: 2, 2: 4}, axis='index')
+           A  B
+        0  1  4
+        2  2  5
+        4  3  6
+        """
+
+    def reindex():
+        pass
+
+    def replace():
+        """
+        Replace values given in `to_replace` with `value`.
+
+        Values of the DataFrame are replaced with other values dynamically.
+        This differs from updating with ``.loc`` or ``.iloc``, which require
+        you to specify a location to update with some value.
+
+        Parameters
+        ----------
+        to_replace : str, regex, list, dict, Series, int, float, or None
+            How to find the values that will be replaced.
+
+            * numeric, str or regex:
+
+                - numeric: numeric values equal to `to_replace` will be
+                  replaced with `value`
+                - str: string exactly matching `to_replace` will be replaced
+                  with `value`
+                - regex: regexs matching `to_replace` will be replaced with
+                  `value`
+
+            * list of str, regex, or numeric:
+
+                - First, if `to_replace` and `value` are both lists, they
+                  **must** be the same length.
+                - Second, if ``regex=True`` then all the strings in **both**
+                  lists will be interpreted as regexs otherwise they will match
+                  directly. This doesn't matter much for `value` since there
+                  are only a few possible substitution regexes you can use.
+                - str, regex and numeric rules apply as above.
+
+            * dict:
+
+                - Dicts can be used to specify different replacement values
+                  for different existing values. For example,
+                  ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and
+                  'y' with 'z'. To use a dict in this way, the optional `value`
+                  parameter should not be given.
+                - For a DataFrame a dict can specify that different values
+                  should be replaced in different columns. For example,
+                  ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a'
+                  and the value 'z' in column 'b' and replaces these values
+                  with whatever is specified in `value`. The `value` parameter
+                  should not be ``None`` in this case. You can treat this as a
+                  special case of passing two lists except that you are
+                  specifying the column to search in.
+                - For a DataFrame nested dictionaries, e.g.,
+                  ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column
+                  'a' for the value 'b' and replace it with NaN. The optional `value`
+                  parameter should not be specified to use a nested dict in this
+                  way. You can nest regular expressions as well. Note that
+                  column names (the top-level dictionary keys in a nested
+                  dictionary) **cannot** be regular expressions.
+
+            * None:
+
+                - This means that the `regex` argument must be a string,
+                  compiled regular expression, or list, dict, ndarray or
+                  Series of such elements. If `value` is also ``None`` then
+                  this **must** be a nested dictionary or Series.
+
+            See the examples section for examples of each of these.
+        value : scalar, dict, list, str, regex, default None
+            Value to replace any values matching `to_replace` with.
+            For a DataFrame a dict of values can be used to specify which
+            value to use for each column (columns not in the dict will not be
+            filled). Regular expressions, strings and lists or dicts of such
+            objects are also allowed.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+        limit : int, default None
+            Maximum size gap to forward or backward fill.
+            This parameter is not supported.
+        regex : bool or same types as `to_replace`, default False
+            Whether to interpret `to_replace` and/or `value` as regular
+            expressions. Alternatively, this could be a regular expression or a
+            list, dict, or array of regular expressions in which case
+            `to_replace` must be ``None``.
+        method : {{'pad', 'ffill', 'bfill'}}
+            The method to use when for replacement, when `to_replace` is a
+            scalar, list or tuple and `value` is ``None``.
+            This parameter is not supported.
+
+        Returns
+        -------
+        DataFrame
+            DataFrame Object after replacement if inplace=False, None otherwise.
+
+        Raises
+        ------
+        AssertionError
+            * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
+
+        TypeError
+            * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
+            * If `to_replace` is a ``dict`` and `value` is not a ``list``,
+              ``dict``, ``ndarray``, or ``Series``
+            * If `to_replace` is ``None`` and `regex` is not compilable
+              into a regular expression or is a list, dict, ndarray, or
+              Series.
+            * When replacing multiple ``bool`` or ``datetime64`` objects and
+              the arguments to `to_replace` does not match the type of the
+              value being replaced
+
+        ValueError
+            * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
+              `value` but they are not the same length.
+
+        NotImplementedError
+            * If ``method`` or ``limit`` is provided.
+
+        Notes
+        -----
+        * Regex substitution is performed under the hood using snowflake backend.
+          which supports POSIX ERE syntax for regular expressions. Please check usage
+          notes for details.
+          https://docs.snowflake.com/en/sql-reference/functions-regexp#general-usage-notes
+        * Regular expressions only replace string values. If a regular expression is
+          created to match floating point numbers, it will only match string data not
+          numeric data.
+        * This method has *a lot* of options. You are encouraged to experiment
+          and play with this method to gain intuition about how it works.
+
+        Examples
+        --------
+
+        **Scalar `to_replace` and `value`**
+
+        >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], 'B': [5, 6, 7, 8, 9]})
+        >>> df.replace(0, 5)
+           A  B
+        0  5  5
+        1  1  6
+        2  2  7
+        3  3  8
+        4  4  9
+
+        **List-like `to_replace`**
+
+        >>> df.replace([0, 1, 2, 3], 4)
+           A  B
+        0  4  5
+        1  4  6
+        2  4  7
+        3  4  8
+        4  4  9
+
+        >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
+           A  B
+        0  4  5
+        1  3  6
+        2  2  7
+        3  1  8
+        4  4  9
+
+        **dict-like `to_replace`**
+
+        >>> df.replace({0: 10, 1: 100})
+             A  B
+        0   10  5
+        1  100  6
+        2    2  7
+        3    3  8
+        4    4  9
+
+        >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], 'B': [5, 6, 7, 8, 9], 'C': ['a', 'b', 'c', 'd', 'e']})
+        >>> df.replace({'A': 0, 'B': 5}, 100)
+             A    B  C
+        0  100  100  a
+        1    1    6  b
+        2    2    7  c
+        3    3    8  d
+        4    4    9  e
+
+        >>> df.replace({'A': {0: 100, 4: 400}})
+             A  B  C
+        0  100  5  a
+        1    1  6  b
+        2    2  7  c
+        3    3  8  d
+        4  400  9  e
+
+        **Regular expression `to_replace`**
+
+        >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
+        ...                    'B': ['abc', 'bar', 'xyz']})
+        >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
+              A    B
+        0   new  abc
+        1   foo  new
+        2  bait  xyz
+
+        >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
+              A    B
+        0   new  abc
+        1   foo  bar
+        2  bait  xyz
+
+        >>> df.replace(regex=r'^ba.$', value='new')
+              A    B
+        0   new  abc
+        1   foo  new
+        2  bait  xyz
+
+        >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
+              A    B
+        0   new  abc
+        1   xyz  new
+        2  bait  xyz
+
+        >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
+              A    B
+        0   new  abc
+        1   new  new
+        2  bait  xyz
+
+        When ``regex=True``, ``value`` is not ``None`` and `to_replace` is a string,
+        the replacement will be applied in all columns of the DataFrame.
+
+        >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
+        ...                    'B': ['a', 'b', 'c', 'd', 'e'],
+        ...                    'C': ['f', 'g', 'h', 'i', 'j']})
+
+        >>> df.replace(to_replace='^[a-g]', value='e', regex=True)
+             A  B  C
+        0  0.0  e  e
+        1  1.0  e  e
+        2  2.0  e  h
+        3  3.0  e  i
+        4  4.0  e  j
+
+        If ``value`` is not ``None`` and `to_replace` is a dictionary, the dictionary
+        keys will be the DataFrame columns that the replacement will be applied.
+
+        >>> df.replace(to_replace={'B': '^[a-c]', 'C': '^[h-j]'}, value='e', regex=True)
+           A  B  C
+        0  0  e  f
+        1  1  e  g
+        2  2  e  e
+        3  3  d  e
+        4  4  e  e
+        """
+
+    def rfloordiv():
+        """
+        Get integer division of ``DataFrame`` and `other`, element-wise (binary operator `rfloordiv`).
+        """
+
+    def radd():
+        """
+        Get addition of ``DataFrame`` and `other`, element-wise (binary operator `radd`).
+        """
+
+    def rmod():
+        """
+        Get modulo of ``DataFrame`` and `other`, element-wise (binary operator `rmod`).
+        """
+
+    def round():
+        """
+        Round a DataFrame to a variable number of decimal places.
+
+        Parameters
+        ----------
+        decimals : int, dict, Series
+            Number of decimal places to round each column to. If an int is given, round each column to the same number of places. Otherwise dict and Series round to variable numbers of places. Column names should be in the keys if decimals is a dict-like, or in the index if decimals is a Series. Any columns not included in decimals will be left as is. Elements of decimals which are not columns of the input will be ignored.
+        *args
+            Additional keywords have no effect but might be accepted for compatibility with numpy.
+        **kwargs
+            Additional keywords have no effect but might be accepted for compatibility with numpy.
+
+        Returns
+        -------
+        DataFrame
+            A DataFrame with the affected columns rounded to the specified number of decimal places.
+
+        See Also
+        --------
+        numpy.around : Round a numpy array to the given number of decimals.
+        Series.round : Round a Series to the given number of decimals.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], columns=['dogs', 'cats'])
+        >>> df
+           dogs  cats
+        0  0.21  0.32
+        1  0.01  0.67
+        2  0.66  0.03
+        3  0.21  0.18
+
+        By providing an integer each column is rounded to the same number of decimal places
+
+        >>> df.round(1)
+           dogs  cats
+        0   0.2   0.3
+        1   0.0   0.7
+        2   0.7   0.0
+        3   0.2   0.2
+
+        With a dict, the number of places for specific columns can be specified with the column names as key and the number of decimal places as value
+
+        >>> df.round({'dogs': 1, 'cats': 0})
+           dogs  cats
+        0   0.2   0.0
+        1   0.0   1.0
+        2   0.7   0.0
+        3   0.2   0.0
+        """
+
+    def rpow():
+        """
+        Get exponential power of ``DataFrame`` and `other`, element-wise (binary operator `rpow`).
+        """
+
+    def rsub():
+        """
+        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `rsub`).
+        """
+
+    def rtruediv():
+        """
+        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `rtruediv`).
+        """
+
+    rdiv = rtruediv
+
+    def select_dtypes():
+        """
+        Return a subset of the ``DataFrame``'s columns based on the column dtypes.
+        At least one of the parameters must be specified, and the include/exclude lists must not overlap.
+
+        Parameters
+        ----------
+        include : Optional[ListLike | type], default None
+            A list of dtypes to include in the result.
+        exclude : Optional[ListLike | type], default None
+            A list of dtypes to exclude from the result.
+
+        Result
+        ------
+        DataFrame
+            The subset of the frame including the dtypes in `include` and excluding the dtypes in
+            `exclude`. If a column's dtype is a subtype of a type in both `include` and `exclude` (such
+            as if `include=[np.number]` and `exclude=[int]`), then the column will be excluded.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'a': [1, 2] * 3,
+        ...                    'b': [True, False] * 3,
+        ...                    'c': [1.0, 2.0] * 3})
+
+        >>> df.dtypes  # doctest: +NORMALIZE_WHITESPACE
+        a      int64
+        b       bool
+        c    float64
+        dtype: object
+
+        Including all number columns:
+
+        >>> df.select_dtypes("number")  # doctest: +NORMALIZE_WHITESPACE
+           a    c
+        0  1  1.0
+        1  2  2.0
+        2  1  1.0
+        3  2  2.0
+        4  1  1.0
+        5  2  2.0
+
+        Including only bool columns:
+
+        >>> df.select_dtypes(include=[bool])  # doctest: +NORMALIZE_WHITESPACE
+               b
+        0   True
+        1  False
+        2   True
+        3  False
+        4   True
+        5  False
+
+        Excluding int columns:
+
+        >>> df.select_dtypes(exclude=[int])  # doctest: +NORMALIZE_WHITESPACE
+               b    c
+        0   True  1.0
+        1  False  2.0
+        2   True  1.0
+        3  False  2.0
+        4   True  1.0
+        5  False  2.0
+        """
+
+    def shift():
+        """
+        Shift data by desired number of periods along axis and replace columns with fill_value (default: None).
+
+        Snowpark pandas does not support `freq` currently.
+
+        Parameters
+        ----------
+        periods : int
+            Number of periods to shift. Can be positive or negative.
+        freq : not supported, default None
+        axis : {0 or 'index', 1 or 'columns', None}, default None
+            Shift direction.
+        fill_value : object, optional
+            The scalar value to use for newly introduced missing values.
+            the default depends on the dtype of `self`.
+            For numeric data, ``np.nan`` is used.
+            For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
+            For extension dtypes, ``self.dtype.na_value`` is used.
+
+        Returns
+        -------
+        DataFrame
+            Copy of input object, shifted.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"Col1": [10, 20, 15, 30, 45],
+        ...                    "Col2": [13, 23, 18, 33, 48],
+        ...                    "Col3": [17, 27, 22, 37, 52]},
+        ...                   index=pd.date_range("2020-01-01", "2020-01-05"))
+        >>> df
+                    Col1  Col2  Col3
+        2020-01-01    10    13    17
+        2020-01-02    20    23    27
+        2020-01-03    15    18    22
+        2020-01-04    30    33    37
+        2020-01-05    45    48    52
+
+        >>> df.shift(periods=3)
+                    Col1  Col2  Col3
+        2020-01-01   NaN   NaN   NaN
+        2020-01-02   NaN   NaN   NaN
+        2020-01-03   NaN   NaN   NaN
+        2020-01-04  10.0  13.0  17.0
+        2020-01-05  20.0  23.0  27.0
+
+        >>> df.shift(periods=1, axis="columns")
+                    Col1  Col2  Col3
+        2020-01-01  None    10    13
+        2020-01-02  None    20    23
+        2020-01-03  None    15    18
+        2020-01-04  None    30    33
+        2020-01-05  None    45    48
+
+        >>> df.shift(periods=3, fill_value=0)
+                    Col1  Col2  Col3
+        2020-01-01     0     0     0
+        2020-01-02     0     0     0
+        2020-01-03     0     0     0
+        2020-01-04    10    13    17
+        2020-01-05    20    23    27
+
+        """
+
+    def set_index():
+        """
+        Set the DataFrame index using existing columns.
+
+        Set the DataFrame index (row labels) using one or more existing
+        columns or arrays (of the correct length). The index can replace the
+        existing index or expand on it.
+
+        Parameters
+        ----------
+        keys : label or array-like or list of labels/arrays
+            This parameter can be either a single column key, a single array of
+            the same length as the calling DataFrame, or a list containing an
+            arbitrary combination of column keys and arrays. Here, "array"
+            encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and
+            instances of :class:`~collections.abc.Iterator`.
+        drop : bool, default True
+            Delete columns to be used as the new index.
+        append : bool, default False
+            Whether to append columns to existing index.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+        verify_integrity : bool, default False
+            Check the new index for duplicates. Otherwise, defer the check until
+            necessary. Setting to False will improve the performance of this
+            method.
+
+        Returns
+        -------
+        DataFrame or None
+            Changed row labels or None if ``inplace=True``.
+
+        See Also
+        --------
+        DataFrame.reset_index : Opposite of set_index.
+        DataFrame.reindex : Change to new indices or expand indices.
+        DataFrame.reindex_like : Change to same indices as other DataFrame.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'month': [1, 4, 7, 10],
+        ...                    'year': [2012, 2014, 2013, 2014],
+        ...                    'sale': [55, 40, 84, 31]})
+        >>> df
+           month  year  sale
+        0      1  2012    55
+        1      4  2014    40
+        2      7  2013    84
+        3     10  2014    31
+
+        Set the index to become the 'month' column:
+
+        >>> df.set_index('month')  # doctest: +NORMALIZE_WHITESPACE
+               year  sale
+        month
+        1      2012    55
+        4      2014    40
+        7      2013    84
+        10     2014    31
+
+        Create a MultiIndex using columns 'year' and 'month':
+
+        >>> df.set_index(['year', 'month'])  # doctest: +NORMALIZE_WHITESPACE
+                    sale
+        year month
+        2012 1        55
+        2014 4        40
+        2013 7        84
+        2014 10       31
+
+        Create a MultiIndex using an Index and a column:
+
+        >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year']) # doctest: +NORMALIZE_WHITESPACE
+                 month  sale
+           year
+        1  2012  1      55
+        2  2014  4      40
+        3  2013  7      84
+        4  2014  10     31
+
+        Create a MultiIndex using two Series:
+
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> df.set_index([s, s**2]) # doctest: +NORMALIZE_WHITESPACE
+                month  year  sale
+        1 1.0       1  2012    55
+        2 4.0       4  2014    40
+        3 9.0       7  2013    84
+        4 16.0     10  2014    31
+        """
+
+    def squeeze():
+        """
+        Squeeze 1 dimensional axis objects into scalars.
+
+        Series or DataFrames with a single element are squeezed to a scalar.
+        DataFrames with a single column or a single row are squeezed to a
+        Series. Otherwise, the object is unchanged.
+
+        This method is most useful when you don't know if your
+        object is a Series or DataFrame, but you do know it has just a single
+        column. In that case you can safely call `squeeze` to ensure you have a
+        Series.
+
+        Parameters
+        ----------
+        axis : {0 or 'index', 1 or 'columns', None}, default None
+            A specific axis to squeeze. By default, all length-1 axes are
+            squeezed. For `Series` this parameter is unused and defaults to `None`.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`, Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`, or scalar
+            The projection after squeezing `axis` or all the axes.
+
+        See Also
+        --------
+        Series.iloc : Integer-location based indexing for selecting scalars.
+        DataFrame.iloc : Integer-location based indexing for selecting Series.
+        Series.to_frame : Inverse of DataFrame.squeeze for a
+            single-column DataFrame.
+
+        Examples
+        --------
+        >>> primes = pd.Series([2, 3, 5, 7])
+
+        Slicing might produce a Series with a single value:
+
+        >>> even_primes = primes[primes % 2 == 0]   # doctest: +SKIP
+        >>> even_primes  # doctest: +SKIP
+        0    2
+        dtype: int64
+
+        >>> even_primes.squeeze()  # doctest: +SKIP
+        2
+
+        Squeezing objects with more than one value in every axis does nothing:
+
+        >>> odd_primes = primes[primes % 2 == 1]  # doctest: +SKIP
+        >>> odd_primes  # doctest: +SKIP
+        1    3
+        2    5
+        3    7
+        dtype: int64
+
+        >>> odd_primes.squeeze()  # doctest: +SKIP
+        1    3
+        2    5
+        3    7
+        dtype: int64
+
+        Squeezing is even more effective when used with DataFrames.
+
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
+        >>> df
+           a  b
+        0  1  2
+        1  3  4
+
+        Slicing a single column will produce a DataFrame with the columns
+        having only one value:
+
+        >>> df_a = df[['a']]
+        >>> df_a
+           a
+        0  1
+        1  3
+
+        So the columns can be squeezed down, resulting in a Series:
+
+        >>> df_a.squeeze('columns')
+        0    1
+        1    3
+        Name: a, dtype: int64
+
+        Slicing a single row from a single column will produce a single
+        scalar DataFrame:
+
+        >>> df_0a = df.loc[df.index < 1, ['a']]
+        >>> df_0a
+           a
+        0  1
+
+        Squeezing the rows produces a single scalar Series:
+
+        >>> df_0a.squeeze('rows')
+        a    1
+        Name: 0, dtype: int64
+
+        Squeezing all axes will project directly into a scalar:
+
+        >>> df_0a.squeeze()
+        1
+        """
+
+    def stack():
+        """
+        Stack the prescribed level(s) from columns to index.
+        """
+
+    def sub():
+        """
+        Get subtraction of ``DataFrame`` and `other`, element-wise (binary operator `sub`).
+        """
+
+    subtract = sub
+
+    def to_feather():
+        """
+        Write a ``DataFrame`` to the binary Feather format.
+        """
+
+    def to_gbq():
+        """
+        Write a ``DataFrame`` to a Google BigQuery table.
+        """
+
+    def to_orc():
+        pass
+
+    def to_html():
+        """
+        Render a ``DataFrame`` as an HTML table.
+        """
+
+    def to_parquet():
+        pass
+
+    def to_period():
+        """
+        Convert ``DataFrame`` from ``DatetimeIndex`` to ``PeriodIndex``.
+        """
+
+    def to_records():
+        """
+        Convert ``DataFrame`` to a NumPy record array.
+        """
+
+    def to_stata():
+        pass
+
+    def to_xml():
+        pass
+
+    def to_dict():
+        """
+        Convert the DataFrame to a dictionary.
+        Note that this method will pull the data to the client side.
+
+        The type of the key-value pairs can be customized with the parameters
+        (see below).
+
+        Parameters
+        ----------
+        orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
+            Determines the type of the values of the dictionary.
+
+            - 'dict' (default) : dict like {column -> {index -> value}}
+            - 'list' : dict like {column -> [values]}
+            - 'series' : dict like {column -> Series(values)}. Note that the result will be native pandas Series
+            - 'split' : dict like
+              {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+            - 'tight' : dict like
+              {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+              'index_names' -> [index.names], 'column_names' -> [column.names]}
+            - 'records' : list like
+              [{column -> value}, ... , {column -> value}]
+            - 'index' : dict like {index -> {column -> value}}
+
+        into : class, default dict
+            The collections.abc.Mapping subclass used for all Mappings
+            in the return value.  Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass it initialized.
+
+        Returns
+        -------
+        dict, list or collections.abc.Mapping
+            Return a collections.abc.Mapping object representing the DataFrame.
+            The resulting transformation depends on the `orient` parameter.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'col1': [1, 2],
+        ...                    'col2': [0.5, 0.75]},
+        ...                   index=['row1', 'row2'])
+        >>> df
+              col1  col2
+        row1     1  0.50
+        row2     2  0.75
+        >>> df.to_dict()  # doctest: +NORMALIZE_WHITESPACE
+        {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
+
+        You can specify the return orientation.
+
+        >>> df.to_dict('series')  # doctest: +NORMALIZE_WHITESPACE
+        {'col1': row1    1
+                 row2    2
+        Name: col1, dtype: int64,
+        'col2': row1    0.50
+                row2    0.75
+        Name: col2, dtype: float64}
+
+        >>> df.to_dict('split')  # doctest: +NORMALIZE_WHITESPACE
+        {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+         'data': [[1, 0.5], [2, 0.75]]}
+
+        >>> df.to_dict('records')  # doctest: +NORMALIZE_WHITESPACE
+        [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
+
+        >>> df.to_dict('index')  # doctest: +NORMALIZE_WHITESPACE
+        {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
+
+        >>> df.to_dict('tight')  # doctest: +NORMALIZE_WHITESPACE
+        {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+         'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
+
+        You can also specify the mapping type.
+
+        >>> from collections import OrderedDict, defaultdict
+        >>> df.to_dict(into=OrderedDict)  # doctest: +NORMALIZE_WHITESPACE
+        OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
+                     ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
+
+        If you want a `defaultdict`, you need to initialize it:
+
+        >>> dd = defaultdict(list)
+        >>> df.to_dict('records', into=dd)  # doctest: +NORMALIZE_WHITESPACE
+        [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+         defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
+        """
+
+    def to_timestamp():
+        """
+        Cast to DatetimeIndex of timestamps, at *beginning* of period.
+        """
+
+    def truediv():
+        """
+        Get floating division of ``DataFrame`` and `other`, element-wise (binary operator `truediv`).
+        """
+
+    div = divide = truediv
+
+    def update():
+        """
+        Modify in place using non-NA values from another ``DataFrame``.
+        """
+
+    def diff():
+        """
+        First discrete difference of element.
+
+        Calculates the difference of a DataFrame element compared with another element
+        in the DataFrame (default is element in previous row).
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for calculating difference, accepts negative values.
+
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Take difference over rows (0) or columns (1).
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` with the
+            first differences of the Series.
+
+        Notes
+        -----
+        For boolean dtypes, this uses operator.xor() rather than operator.sub(). The result
+        is calculated according to current dtype in DataFrame, however dtype of the result
+        is always float64.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],
+        ...                    'b': [1, 1, 2, 3, 5, 8],
+        ...                    'c': [1, 4, 9, 16, 25, 36]})
+        >>> df # doctest: +NORMALIZE_WHITESPACE
+           a  b   c
+        0  1  1   1
+        1  2  1   4
+        2  3  2   9
+        3  4  3  16
+        4  5  5  25
+        5  6  8  36
+
+        Difference with previous row
+
+        >>> df.diff() # doctest: +NORMALIZE_WHITESPACE
+            a    b     c
+        0  NaN  NaN   NaN
+        1  1.0  0.0   3.0
+        2  1.0  1.0   5.0
+        3  1.0  1.0   7.0
+        4  1.0  2.0   9.0
+        5  1.0  3.0  11.0
+
+        Difference with previous column
+
+        >>> df.diff(axis=1) # doctest: +NORMALIZE_WHITESPACE
+            a   b   c
+        0 None  0   0
+        1 None -1   3
+        2 None -1   7
+        3 None -1  13
+        4 None  0  20
+        5 None  2  28
+
+        Difference with 3rd previous row
+
+        >>> df.diff(periods=3) # doctest: +NORMALIZE_WHITESPACE
+            a    b     c
+        0  NaN  NaN   NaN
+        1  NaN  NaN   NaN
+        2  NaN  NaN   NaN
+        3  3.0  2.0  15.0
+        4  3.0  4.0  21.0
+        5  3.0  6.0  27.0
+
+        Difference with following row
+
+        >>> df.diff(periods=-1) # doctest: +NORMALIZE_WHITESPACE
+            a    b     c
+        0 -1.0  0.0  -3.0
+        1 -1.0 -1.0  -5.0
+        2 -1.0 -1.0  -7.0
+        3 -1.0 -2.0  -9.0
+        4 -1.0 -3.0 -11.0
+        5  NaN  NaN   NaN
+        """
+
+    def drop():
+        """
+        Drop specified labels from rows or columns.
+
+        Remove rows or columns by specifying label names and corresponding
+        axis, or by specifying directly index or column names. When using a
+        multi-index, labels on different levels can be removed by specifying
+        the level. See the `user guide <advanced.shown_levels>`
+        for more information about the now unused levels.
+
+        Parameters
+        ----------
+        labels : single label or list-like
+            Index or column labels to drop. A tuple will be used as a single
+            label and not treated as a list-like.
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Whether to drop labels from the index (0 or 'index') or
+            columns (1 or 'columns').
+        index : single label or list-like
+            Alternative to specifying axis (``labels, axis=0``
+            is equivalent to ``index=labels``).
+        columns : single label or list-like
+            Alternative to specifying axis (``labels, axis=1``
+            is equivalent to ``columns=labels``).
+        level : int or level name, optional
+            For MultiIndex, level from which the labels will be removed.
+        inplace : bool, default False
+            If False, return a copy. Otherwise, do operation
+            inplace and return None.
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and only existing labels are
+            dropped.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` or None
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` without the removed index or column labels or
+            None if ``inplace=True``.
+
+        Raises
+        ------
+        KeyError
+            If any of the labels is not found in the selected axis.
+
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(np.arange(12).reshape(3, 4),
+        ...                   columns=['A', 'B', 'C', 'D'])
+        >>> df
+           A  B   C   D
+        0  0  1   2   3
+        1  4  5   6   7
+        2  8  9  10  11
+
+        Drop columns
+
+        >>> df.drop(['B', 'C'], axis=1)
+           A   D
+        0  0   3
+        1  4   7
+        2  8  11
+
+        >>> df.drop(columns=['B', 'C'])
+           A   D
+        0  0   3
+        1  4   7
+        2  8  11
+
+        Drop a row by index
+
+        >>> df.drop([0, 1])
+           A  B   C   D
+        2  8  9  10  11
+
+        Drop columns and/or rows of MultiIndex DataFrame
+
+        >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
+        ...                              ['speed', 'weight', 'length']],
+        ...                      codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
+        ...                             [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+        >>> df = pd.DataFrame(index=midx, columns=['big', 'small'],
+        ...                   data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
+        ...                         [250, 150], [1.5, 0.8], [320, 250],
+        ...                         [1, 0.8], [0.3, 0.2]])
+        >>> df
+                         big  small
+        lama   speed    45.0   30.0
+               weight  200.0  100.0
+               length    1.5    1.0
+        cow    speed    30.0   20.0
+               weight  250.0  150.0
+               length    1.5    0.8
+        falcon speed   320.0  250.0
+               weight    1.0    0.8
+               length    0.3    0.2
+
+        Drop a specific index combination from the MultiIndex
+        DataFrame, i.e., drop the combination ``'falcon'`` and
+        ``'weight'``, which deletes only the corresponding row
+
+        >>> df.drop(index=('falcon', 'weight'))
+                         big  small
+        lama   speed    45.0   30.0
+               weight  200.0  100.0
+               length    1.5    1.0
+        cow    speed    30.0   20.0
+               weight  250.0  150.0
+               length    1.5    0.8
+        falcon speed   320.0  250.0
+               length    0.3    0.2
+
+        >>> df.drop(index='cow', columns='small')
+                         big
+        lama   speed    45.0
+               weight  200.0
+               length    1.5
+        falcon speed   320.0
+               weight    1.0
+               length    0.3
+
+        >>> df.drop(index='length', level=1)
+                         big  small
+        lama   speed    45.0   30.0
+               weight  200.0  100.0
+        cow    speed    30.0   20.0
+               weight  250.0  150.0
+        falcon speed   320.0  250.0
+               weight    1.0    0.8
+        """
+
+    def value_counts():
+        """
+        Return a Series containing the frequency of each distinct row in the Dataframe.
+
+        Parameters
+        ----------
+        subset : label or list of labels, optional
+            Columns to use when counting unique combinations.
+        normalize : bool, default False
+            Return proportions rather than frequencies. Being different from native pandas,
+            Snowpark pandas will return a Series with `decimal.Decimal` values.
+        sort : bool, default True
+            Sort by frequencies when True. Sort by DataFrame column values when False.
+            When there is a tie between counts, the order is still deterministic, but
+            may be different from the result from native pandas.
+        ascending : bool, default False
+            Sort in ascending order.
+        dropna : bool, default True
+            Don't include counts of rows that contain NA values.
+
+        Returns
+        -------
+        Series
+
+        See Also
+        --------
+        :func:`Series.map <snowflake.snowpark.modin.pandas.Series.>` : Equivalent method on Series.
+
+        Notes
+        -----
+        The returned Series will have a MultiIndex with one level per input
+        column but an Index (non-multi) for a single label. By default, rows
+        that contain any NA values are omitted from the result. By default,
+        the resulting Series will be in descending order so that the first
+        element is the most frequently-occurring row.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
+        ...                    'num_wings': [2, 0, 0, 0]},
+        ...                   index=['falcon', 'dog', 'cat', 'ant'])
+        >>> df
+                num_legs  num_wings
+        falcon         2          2
+        dog            4          0
+        cat            4          0
+        ant            6          0
+
+        >>> df.value_counts()
+        num_legs  num_wings
+        4         0            2
+        2         2            1
+        6         0            1
+        Name: count, dtype: int64
+
+        >>> df.value_counts(sort=False)
+        num_legs  num_wings
+        2         2            1
+        4         0            2
+        6         0            1
+        Name: count, dtype: int64
+
+        >>> df.value_counts(ascending=True)
+        num_legs  num_wings
+        2         2            1
+        6         0            1
+        4         0            2
+        Name: count, dtype: int64
+
+        >>> df.value_counts(normalize=True)
+        num_legs  num_wings
+        4         0            0.50
+        2         2            0.25
+        6         0            0.25
+        Name: proportion, dtype: float64
+
+        With `dropna` set to `False` we can also count rows with NA values.
+
+        >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'],
+        ...                    'middle_name': ['Smith', None, None, 'Louise']})
+        >>> df
+          first_name middle_name
+        0       John       Smith
+        1       Anne        None
+        2       John        None
+        3       Beth      Louise
+
+        >>> df.value_counts()
+        first_name  middle_name
+        John        Smith          1
+        Beth        Louise         1
+        Name: count, dtype: int64
+
+        >>> df.value_counts(dropna=False)
+        first_name  middle_name
+        John        Smith          1
+        Anne        NaN            1
+        John        NaN            1
+        Beth        Louise         1
+        Name: count, dtype: int64
+
+        >>> df.value_counts("first_name")
+        first_name
+        John    2
+        Anne    1
+        Beth    1
+        Name: count, dtype: int64
+        """
+
+    def mask():
+        """
+        Replace values where the condition is True.
+
+        Args:
+            cond: bool Series/DataFrame, array-like, or callable
+                Where cond is False, keep the original value. Where True, replace with corresponding value from other.
+                If cond is callable, it is computed on the Series/DataFrame and should return boolean Series/DataFrame
+                or array. The callable must not change input Series/DataFrame (though pandas doesn’t check it).
+
+            other: scalar, Series/DataFrame, or callable
+                Entries where cond is True are replaced with corresponding value from other. If other is callable,
+                it is computed on the Series/DataFrame and should return scalar or Series/DataFrame. The callable must
+                not change input Series/DataFrame (though pandas doesn’t check it).
+
+            inplace: bool, default False
+                Whether to perform the operation in place on the data.
+
+            axis: int, default None
+                Alignment axis if needed. For Series this parameter is unused and defaults to 0.
+
+            level: int, default None
+                Alignment level if needed.
+
+        Returns:
+            Same type as caller or None if inplace=True.
+
+        See Also:
+            DataFrame.where : Replace values where the condition is False.
+
+        Notes:
+            The mask method is an application of the if-then idiom. For each element in the calling DataFrame, if cond
+            is False the element is used; otherwise the corresponding element from the DataFrame other is used. If the
+            axis of other does not align with axis of cond Series/DataFrame, the misaligned index positions will be
+            filled with True.
+
+            The signature for DataFrame.where() differs from numpy.where(). Roughly df1.where(m, df2) is equivalent to
+            np.where(m, df1, df2).
+
+            For further details and examples see the mask documentation in indexing.
+
+            The dtype of the object takes precedence. The fill value is cast to the object’s dtype, if this can be
+            done losslessly.
+
+        Examples::
+        >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
+        >>> df   # doctest: +NORMALIZE_WHITESPACE
+        A  B
+        0  0  1
+        1  2  3
+        2  4  5
+        3  6  7
+        4  8  9
+
+        >>> m = df % 3 == 0
+        >>> df.mask(m, -df)   # doctest: +NORMALIZE_WHITESPACE
+           A  B
+        0  0  1
+        1  2 -3
+        2  4  5
+        3 -6  7
+        4  8 -9
+
+        Snowpark pandas `DataFrame.mask` behaves the same as `numpy.where`.
+
+        >>> data = np.where(~m, df, -df)
+        >>> df.mask(m, -df) == pd.DataFrame(data, columns=['A', 'B'])  # doctest: +NORMALIZE_WHITESPACE
+            A     B
+        0  True  True
+        1  True  True
+        2  True  True
+        3  True  True
+        4  True  True
+
+        >>> df.mask(m, -df) == df.where(~m, -df)  # doctest: +NORMALIZE_WHITESPACE
+            A     B
+        0  True  True
+        1  True  True
+        2  True  True
+        3  True  True
+        4  True  True
+        """
+
+    def where():
+        """
+        Replace values where the condition is False.
+
+        Args:
+            cond: bool Series/DataFrame, array-like, or callable
+                Where cond is True, keep the original value. Where False, replace with corresponding value from other.
+                If cond is callable, it is computed on the Series/DataFrame and should return boolean Series/DataFrame
+                or array. The callable must not change input Series/DataFrame (though pandas doesn’t check it).
+
+            other: scalar, Series/DataFrame, or callable
+                Entries where cond is False are replaced with corresponding value from other. If other is callable,
+                it is computed on the Series/DataFrame and should return scalar or Series/DataFrame. The callable must
+                not change input Series/DataFrame (though pandas doesn’t check it). If not specified, entries will be
+                filled with the corresponding NULL value (np.nan for numpy dtypes, pd.NA for extension dtypes).
+
+            inplace: bool, default False
+                Whether to perform the operation in place on the data.
+
+            axis: int, default None
+                Alignment axis if needed. For Series this parameter is unused and defaults to 0.
+
+            level: int, default None
+                Alignment level if needed.
+
+        Returns:
+            Same type as caller or None if inplace=True.
+
+        Notes:
+            The where method is an application of the if-then idiom. For each element in the calling DataFrame, if cond
+            is True the element is used; otherwise the corresponding element from the DataFrame other is used. If the
+            axis of other does not align with axis of cond Series/DataFrame, the misaligned index positions will be
+            filled with False.
+
+            The signature for DataFrame.where() differs from numpy.where(). Roughly df1.where(m, df2) is equivalent to
+            np.where(m, df1, df2).
+
+            For further details and examples see the where documentation in indexing.
+
+            The dtype of the object takes precedence. The fill value is cast to the object’s dtype, if this can be
+            done losslessly.
+
+        Examples::
+
+        >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
+
+        >>> df  # doctest: +NORMALIZE_WHITESPACE
+           A  B
+        0  0  1
+        1  2  3
+        2  4  5
+        3  6  7
+        4  8  9
+
+        >>> m = df % 3 == 0
+        >>> df.where(m, -df)  # doctest: +NORMALIZE_WHITESPACE
+           A  B
+        0  0 -1
+        1 -2  3
+        2 -4 -5
+        3  6 -7
+        4 -8  9
+
+        Snowpark pandas `DataFrame.where` behaves the same as `numpy.where`.
+
+        >>> data = np.where(m, df, -df)
+        >>> df.where(m, -df) == pd.DataFrame(data, columns=['A', 'B'])  # doctest: +NORMALIZE_WHITESPACE
+              A     B
+        0  True  True
+        1  True  True
+        2  True  True
+        3  True  True
+        4  True  True
+
+        >>> df.where(m, -df) == df.mask(~m, -df)  # doctest: +SKIP
+              A     B
+        0  True  True
+        1  True  True
+        2  True  True
+        3  True  True
+        4  True  True
+        """
+
+    def xs():
+        """
+        Return cross-section from the ``DataFrame``.
+        """
+
+    def set_axis():
+        """
+        Assign desired index to given axis.
+
+        Parameters
+        ----------
+        labels : list-like, Index, MultiIndex
+            The values for the new index.
+        axis : {index (0), rows(0), columns (1)}
+            Axis for the function to be applied on.
+        copy : bool, default True
+            To maintain compatibility with pandas, does nothing.
+
+        Returns
+        -------
+        DataFrame
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({
+        ... "Videogame": ["Dark Souls", "Cruelty Squad", "Stardew Valley"],
+        ... "Genre": ["Souls-like", "Immersive-sim", "Farming-sim"],
+        ... "Rating": [9.5, 9.0, 8.7]})
+        >>> df.set_axis(['a', 'b', 'c'], axis="index") # doctest: +NORMALIZE_WHITESPACE
+                Videogame          Genre  Rating
+        a      Dark Souls     Souls-like     9.5
+        b   Cruelty Squad  Immersive-sim     9.0
+        c  Stardew Valley    Farming-sim     8.7
+
+        >>> df.set_axis(["Name", "Sub-genre", "Rating out of 10"], axis=1) # doctest: +NORMALIZE_WHITESPACE
+                     Name      Sub-genre  Rating out of 10
+        0      Dark Souls     Souls-like               9.5
+        1   Cruelty Squad  Immersive-sim               9.0
+        2  Stardew Valley    Farming-sim               8.7
+
+        >>> columns = pd.MultiIndex.from_tuples([("Gas", "Toyota"), ("Gas", "Ford"), ("Electric", "Tesla"), ("Electric", "Nio"),])
+        >>> data = [[100, 300, 900, 400], [200, 500, 300, 600]]
+        >>> df = pd.DataFrame(columns=columns, data=data)
+        >>> df.set_axis([2010, 2015], axis="rows") # doctest: +NORMALIZE_WHITESPACE
+                Gas      Electric
+             Toyota Ford    Tesla  Nio
+        2010    100  300      900  400
+        2015    200  500      300  600
+        """
+
+    def __getattr__():
+        """
+        Return item identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to get.
+
+        Returns
+        -------
+        Any
+
+        Notes
+        -----
+        First try to use `__getattribute__` method. If it fails
+        try to get `key` from ``DataFrame`` fields.
+        """
+
+    def __setattr__():
+        """
+        Set attribute `value` identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to set.
+        value : Any
+            Value to set.
+        """
+
+    def __setitem__():
+        """
+        Set attribute `value` identified by `key`.
+
+        Args:
+            key: Key to set
+            value:  Value to set
+
+        Note:
+            In the case where value is any list like or array, pandas checks the array length against the number of rows
+            of the input dataframe. If there is a mismatch, a ValueError is raised. Snowpark pandas indexing won't throw
+            a ValueError because knowing the length of the current dataframe can trigger eager evaluations; instead if
+            the array is longer than the number of rows we ignore the additional values. If the array is shorter, we use
+            enlargement filling with the last value in the array.
+
+        Returns:
+            None
+        """
+
+    def abs():
+        """
+        Return a DataFrame with absolute numeric value of each element.
+
+        Returns
+        -------
+        DataFrame
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'a': [1,-2,3], 'b': [-4.33, 5, 6]})
+        >>> df
+           a     b
+        0  1 -4.33
+        1 -2  5.00
+        2  3  6.00
+
+        >>> abs(df)
+           a     b
+        0  1  4.33
+        1  2  5.00
+        2  3  6.00
+        """
+
+    @_doc_binary_op(
+        operation="union", bin_op="and", right="other", **_doc_binary_op_kwargs
+    )
+    def __and__():
+        pass
+
+    @_doc_binary_op(
+        operation="union", bin_op="rand", right="other", **_doc_binary_op_kwargs
+    )
+    def __rand__():
+        pass
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="or",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __or__():
+        pass
+
+    @_doc_binary_op(
+        operation="disjunction",
+        bin_op="ror",
+        right="other",
+        **_doc_binary_op_kwargs,
+    )
+    def __ror__():
+        pass
+
+    def __neg__():
+        """
+        Returns a DataFrame with the sign changed for each element.
+
+        Returns
+        -------
+        DataFrame
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'a': [1,-2,3], 'b': [-4.33, 5, 6]})
+        >>> df
+           a     b
+        0  1 -4.33
+        1 -2  5.00
+        2  3  6.00
+
+        >>> - df
+           a     b
+        0 -1  4.33
+        1  2 -5.00
+        2 -3 -6.00
+        """
+
+    def __iter__():
+        """
+        Iterate over info axis.
+
+        Returns
+        -------
+        iterable
+            Iterator of the columns names.
+        """
+
+    def __contains__():
+        """
+        Check if `key` in the ``DataFrame.columns``.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to check the presence in the columns.
+
+        Returns
+        -------
+        bool
+        """
+
+    def __round__():
+        """
+        Round each value in a ``DataFrame`` to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default: 0
+            Number of decimal places to round to.
+
+        Returns
+        -------
+        DataFrame
+        """
+
+    def __delitem__():
+        """
+        Delete item identified by `key` label.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to delete.
+        """
+
+    __add__ = add
+    __iadd__ = add
+    __radd__ = radd
+    __mul__ = mul
+    __imul__ = mul
+    __rmul__ = rmul
+    __pow__ = pow
+    __ipow__ = pow
+    __rpow__ = rpow
+    __sub__ = sub
+    __isub__ = sub
+    __rsub__ = rsub
+    __floordiv__ = floordiv
+    __ifloordiv__ = floordiv
+    __rfloordiv__ = rfloordiv
+    __truediv__ = truediv
+    __itruediv__ = truediv
+    __rtruediv__ = rtruediv
+    __mod__ = mod
+    __imod__ = mod
+    __rmod__ = rmod
+    __rdiv__ = rdiv
+
+    def __dataframe__():
+        """
+        Get a Modin DataFrame that implements the dataframe exchange protocol.
+
+        See more about the protocol in https://data-apis.org/dataframe-protocol/latest/index.html.
+
+        Parameters
+        ----------
+        nan_as_null : bool, default: False
+            A keyword intended for the consumer to tell the producer
+            to overwrite null values in the data with ``NaN`` (or ``NaT``).
+            This currently has no effect; once support for nullable extension
+            dtypes is added, this value should be propagated to columns.
+        allow_copy : bool, default: True
+            A keyword that defines whether or not the library is allowed
+            to make a copy of the data. For example, copying data would be necessary
+            if a library supports strided buffers, given that this protocol
+            specifies contiguous buffers. Currently, if the flag is set to ``False``
+            and a copy is needed, a ``RuntimeError`` will be raised.
+
+        Returns
+        -------
+        ProtocolDataframe
+            A dataframe object following the dataframe protocol specification.
+        """
+
+    @property
+    def attrs():
+        pass
+
+    @property
+    def style():
+        pass
+
+    def isin():
+        """
+        Whether each element in the DataFrame is contained in values.
+
+        Parameters
+        ----------
+        values : list-like, Series, DataFrame or dict
+            The result will only be true at a location if all the
+            labels match. If `values` is a Series, that's the index. If
+            `values` is a dict, the keys must be the column names,
+            which must match. If `values` is a DataFrame,
+            then both the index and column labels must match.
+
+            Snowpark pandas assumes that in the case of values being Series or DataFrame that the index of
+            values is unique (i.e., values.index.is_unique() = True)
+
+        Returns
+        -------
+        DataFrame
+            DataFrame of booleans showing whether each element in the DataFrame
+            is contained in values.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
+        ...                   index=['falcon', 'dog'])
+        >>> df
+                num_legs  num_wings
+        falcon         2          2
+        dog            4          0
+
+        When ``values`` is a list check whether every value in the DataFrame
+        is present in the list (which animals have 0 or 2 legs or wings)
+
+        >>> df.isin([0, 2])
+                num_legs  num_wings
+        falcon      True       True
+        dog        False       True
+
+        To check if ``values`` is *not* in the DataFrame, use the ``~`` operator:
+
+        >>> ~df.isin([0, 2])
+                num_legs  num_wings
+        falcon     False      False
+        dog         True      False
+
+        When ``values`` is a dict, we can pass values to check for each
+        column separately:
+
+        >>> df.isin({'num_wings': [0, 3]})
+                num_legs  num_wings
+        falcon     False      False
+        dog        False       True
+
+        When ``values`` is a Series or DataFrame the index and column must
+        match. Note that 'falcon' does not match based on the number of legs
+        in other.
+
+        >>> other = pd.DataFrame({'num_legs': [8, 3], 'num_wings': [0, 2]},
+        ...                      index=['spider', 'falcon'])
+        >>> df.isin(other)
+                num_legs  num_wings
+        falcon     False       True
+        dog        False      False
+
+        Caution
+        -------
+        Snowpark pandas does not perform a check for the case that values is a DataFrame or Series nor does it check
+        whether the index is unique. Snowpark pandas preserves NULL values; if the DataFrame contains NULL in
+        a cell the output cell will be NULL.
+        """
+
+    def __reduce__(self):
+        pass
+
+    def _set_axis_name():
+        """
+        Alter the name or names of the axis.
+
+        Parameters
+        ----------
+        name : str or list of str
+            Name for the Index, or list of names for the MultiIndex.
+        axis : str or int, default: 0
+            The axis to set the label.
+            0 or 'index' for the index, 1 or 'columns' for the columns.
+        inplace : bool, default: False
+            Whether to modify `self` directly or return a copy.
+
+        Returns
+        -------
+        DataFrame or None
+        """
+        # we need to override the docstring for this private method because
+        # defines it and includes doctests that fail for Snowpark pandas.
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py b/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py
new file mode 100644
index 00000000000..2d41dd39c5b
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py
@@ -0,0 +1,1843 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains groupby docstrings that override modin's docstrings."""
+
+from textwrap import dedent
+
+from pandas.util._decorators import doc
+
+_groupby_agg_method_engine_template = """
+Compute {fname} of group values.
+
+Parameters
+----------
+numeric_only : bool, default {no}
+    Include only float, int, boolean columns.
+
+min_count : int, default {mc}
+    The required number of valid values to perform the operation. If fewer
+    than ``min_count`` non-NA values are present the result will be NA.
+
+engine : str, default None {e}
+    * ``'cython'`` : Runs rolling apply through C-extensions from cython.
+    * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
+        Only available when ``raw`` is set to ``True``.
+    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+engine_kwargs : dict, default None {ek}
+    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+        and ``parallel`` dictionary keys. The values must either be ``True`` or
+        ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+        ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
+        applied to both the ``func`` and the ``apply`` groupby aggregation.
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+Returns
+-------
+:class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+    Computed {fname} of values within each group.
+
+Examples
+--------
+{example}
+"""
+
+_agg_template = """
+Aggregate using one or more operations over the specified axis.
+
+Parameters
+----------
+func : function, str, list, or dict
+    Function to use for aggregating the data. If a function, must either
+    work when passed a {klass} or when passed to {klass}.apply.
+
+    Accepted combinations are:
+
+    - function
+    - string function name
+    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+    - dict of axis labels -> functions, function names or list of such.
+
+*args
+    Positional arguments to pass to func.
+engine : str, default None
+    * ``'cython'`` : Runs the function through C-extensions from cython.
+    * ``'numba'`` : Runs the function through JIT compiled code from numba.
+    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+engine_kwargs : dict, default None
+    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+      and ``parallel`` dictionary keys. The values must either be ``True`` or
+      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+      ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
+      applied to the function
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+**kwargs
+    keyword arguments to be passed into func.
+
+Returns
+-------
+{klass}
+
+{examples}"""
+
+_agg_series_examples_doc = dedent(
+    """
+Examples
+--------
+>>> s = pd.Series([1, 2, 3, 4], index=pd.Index([1, 2, 1, 2]))
+
+>>> s
+1    1
+2    2
+1    3
+2    4
+dtype: int8
+
+>>> s.groupby(level=0).agg('min')
+1    1
+2    2
+dtype: int8
+
+>>> s.groupby(level=0).agg(['min', 'max'])
+    min  max
+1    1    3
+2    2    4
+"""
+)
+
+_agg_examples_dataframe_doc = dedent(
+    """
+Examples
+--------
+>>> df = pd.DataFrame(
+...     {
+...         "A": [1, 1, 2, 2],
+...         "B": [1, 2, 3, 4],
+...         "C": [0.362838, 0.227877, 1.267767, -0.562860],
+...     }
+... )
+
+>>> df
+    A  B         C
+0  1  1  0.362838
+1  1  2  0.227877
+2  2  3  1.267767
+3  2  4 -0.562860
+
+The aggregation is for each column.
+
+>>> df.groupby('A').agg('min')  # doctest: +NORMALIZE_WHITESPACE
+    B         C
+A
+1  1  0.227877
+2  3 -0.562860
+
+Multiple aggregations
+
+>>> df.groupby('A').agg(['min', 'max']) # doctest: +NORMALIZE_WHITESPACE
+    B             C
+    min max       min       max
+A
+1   1   2  0.227877  0.362838
+2   3   4 -0.562860  1.267767
+
+Select a column for aggregation
+
+>>> df.groupby('A').B.agg(['min', 'max'])   # doctest: +NORMALIZE_WHITESPACE
+    min  max
+A
+1    1    2
+2    3    4
+
+User-defined function for aggregation
+
+>>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'})  # doctest: +NORMALIZE_WHITESPACE
+    B             C
+    min max       sum
+A
+1   1   2  0.590715
+2   3   4  0.704907
+"""
+)
+
+
+class DataFrameGroupBy:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    def __getattr__():
+        """
+        Alter regular attribute access, looks up the name in the columns.
+
+        Parameters
+        ----------
+        key : str
+            Attribute name.
+
+        Returns
+        -------
+        The value of the attribute.
+        """
+
+    @property
+    def ngroups():
+        pass
+
+    def skew():
+        pass
+
+    def ffill():
+        pass
+
+    def sem():
+        pass
+
+    def value_counts():
+        pass
+
+    def mean():
+        """
+        Compute mean of groups, excluding missing values.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        engine : str, default None
+            * ``'cython'`` : Runs the operation through C-extensions from cython.
+            * ``'numba'`` : Runs the operation through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
+
+            Note that this parameter is ignored in Snowpark pandas, and the execution engine will always
+            be Snowflake.
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+                and ``parallel`` dictionary keys. The values must either be ``True`` or
+                ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+                ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
+
+            Note that this parameter is ignored in Snowpark pandas, and the execution engine will always
+            be Snowflake. Same as the engine parameter.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
+        ...                    'B': [np.nan, 2, 3, 4, 5],
+        ...                    'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
+
+        Groupby one column and return the mean of the remaining columns in
+        each group.
+
+        >>> df.groupby('A').mean()      # doctest: +NORMALIZE_WHITESPACE
+             B         C
+        A
+        1  3.0  1.333333
+        2  4.0  1.500000
+
+        Groupby two columns and return the mean of the remaining column.
+
+        >>> df.groupby(['A', 'B']).mean()   # doctest: +NORMALIZE_WHITESPACE
+               C
+        A B
+        1 2.0  2.000000
+          4.0  1.000000
+        2 3.0  1.000000
+          5.0  2.000000
+
+        Groupby one column and return the mean of only particular column in
+        the group.
+
+        >>> df.groupby('A')['B'].mean()
+        A
+        1    3.0
+        2    4.0
+        Name: B, dtype: float64
+        """
+
+    def any():
+        pass
+
+    @property
+    def plot():
+        pass
+
+    def ohlc():
+        pass
+
+    def __bytes__():
+        """
+        Convert DataFrameGroupBy object into a python2-style byte string.
+
+        Returns
+        -------
+        bytearray
+            Byte array representation of `self`.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+        pass
+
+    @property
+    def groups():
+        """
+        Get a dictionary mapping group key to row labels.
+
+        Returns
+        -------
+        pandas.io.formats.printing.PrettyDict[Hashable, pandas.Index]
+            Dict {group name -> group labels}.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
+        ...                    'B': [np.nan, 2, 3, 4, 5],
+        ...                    'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
+
+        Groupby one column and get the label of each member of each group.
+
+        >>> df.groupby('A').groups
+        {1: [0, 1, 3], 2: [2, 4]}
+
+        Group a dataframe with a custom index by two columns.
+
+        >>> df.set_index('A', inplace=True)
+        >>> df.groupby(['B', 'C']).groups
+        {(2.0, 2): [1], (3.0, 1): [2], (4.0, 1): [1], (5.0, 2): [2]}
+
+        Notes
+        -----
+        Beware that the return value is a python dictionary, so evaluating this
+        property will trigger evaluation of the pandas dataframe and will
+        materialize data that could be as large as the size of the grouping
+        columns plus the size of the index.
+        """
+
+    @doc(
+        _groupby_agg_method_engine_template,
+        fname="min",
+        no=False,
+        mc=-1,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'b', 'b']
+        >>> ser = pd.Series([1, 2, 3, 4], index=lst)
+        >>> ser
+        a    1
+        a    2
+        b    3
+        b    4
+        dtype: int8
+        >>> ser.groupby(level=0).min()
+        a    1
+        b    3
+        dtype: int8
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["tiger", "leopard", "cheetah", "lion"])
+        >>> df
+                 a  b  c
+        tiger    1  8  2
+        leopard  1  2  5
+        cheetah  2  5  8
+        lion     2  6  9
+        >>> df.groupby("a").min()
+           b  c
+        a
+        1  2  2
+        2  5  8"""
+        ),
+    )
+    def min():
+        pass
+
+    def idxmax():
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        NA/null values are excluded based on `skipna`.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+            axis=1 is not supported since it is deprecated.
+
+            .. deprecated:: 2.1.0
+                For axis=1, operate on the underlying object instead. Otherwise,
+                the axis keyword is not necessary.
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+        Returns
+        -------
+        Series
+            Indexes of maxima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmax : Return index of the maximum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmax``.
+
+        Examples
+        --------
+        >>> small_df_data = [
+        ...        ["lion", 78, 50, 50, 50],
+        ...        ["tiger", -35, 12, -378, 1246],
+        ...        ["giraffe", 54, -9, 67, -256],
+        ...        ["hippopotamus", np.nan, -537, -47, -789],
+        ...        ["tiger", 89, 2, 256, 246],
+        ...        ["tiger", -325, 2, 2, 5],
+        ...        ["tiger", 367, -367, 3, -6],
+        ...        ["giraffe", 25, 6, 312, 6],
+        ...        ["lion", -5, -5, -3, -4],
+        ...        ["lion", 15, np.nan, 2, 12],
+        ...        ["giraffe", 100, 200, 300, 400],
+        ...        ["hippopotamus", -100, -300, -600, -200],
+        ...        ["rhino", 26, 2, -45, 14],
+        ...        ["rhino", -7, 63, 257, -257],
+        ...        ["lion", 1, 2, 3, 4],
+        ...        ["giraffe", -5, -6, -7, 8],
+        ...        ["lion", 1234, 456, 78, np.nan],
+        ... ]
+
+        >>> df = pd.DataFrame(
+        ...     data=small_df_data,
+        ...     columns=("species", "speed", "age", "weight", "height"),
+        ...     index=list("abcdefghijklmnopq"),
+        ... )
+
+        Group by axis=0, apply idxmax on axis=0
+
+        >>> df.groupby("species").idxmax(axis=0, skipna=True)  # doctest: +NORMALIZE_WHITESPACE
+                     speed age weight height
+        species
+        giraffe          k   k      h      k
+        hippopotamus     l   l      d      l
+        lion             q   q      q      a
+        rhino            m   n      n      m
+        tiger            g   b      e      b
+
+        >>> df.groupby("species").idxmax(axis=0, skipna=False)  # doctest: +NORMALIZE_WHITESPACE
+                     speed   age weight height
+        species
+        giraffe          k     k      h      k
+        hippopotamus  None     l      d      l
+        lion             q  None      q   None
+        rhino            m     n      n      m
+        tiger            g     b      e      b
+
+        """
+
+    def idxmin():
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        NA/null values are excluded based on `skipna`.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+            axis=1 is not supported since it is deprecated.
+
+            .. deprecated:: 2.1.0
+                For axis=1, operate on the underlying object instead. Otherwise,
+                the axis keyword is not necessary.
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+        Returns
+        -------
+        Series
+            Indexes of minima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmin : Return index of the minimum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmin``.
+
+        Examples
+        --------
+        >>> small_df_data = [
+        ...        ["lion", 78, 50, 50, 50],
+        ...        ["tiger", -35, 12, -378, 1246],
+        ...        ["giraffe", 54, -9, 67, -256],
+        ...        ["hippopotamus", np.nan, -537, -47, -789],
+        ...        ["tiger", 89, 2, 256, 246],
+        ...        ["tiger", -325, 2, 2, 5],
+        ...        ["tiger", 367, -367, 3, -6],
+        ...        ["giraffe", 25, 6, 312, 6],
+        ...        ["lion", -5, -5, -3, -4],
+        ...        ["lion", 15, np.nan, 2, 12],
+        ...        ["giraffe", 100, 200, 300, 400],
+        ...        ["hippopotamus", -100, -300, -600, -200],
+        ...        ["rhino", 26, 2, -45, 14],
+        ...        ["rhino", -7, 63, 257, -257],
+        ...        ["lion", 1, 2, 3, 4],
+        ...        ["giraffe", -5, -6, -7, 8],
+        ...        ["lion", 1234, 456, 78, np.nan],
+        ... ]
+
+        >>> df = pd.DataFrame(
+        ...     data=small_df_data,
+        ...     columns=("species", "speed", "age", "weight", "height"),
+        ...     index=list("abcdefghijklmnopq"),
+        ... )
+
+        Group by axis=0, apply idxmax on axis=0
+
+        >>> df.groupby("species").idxmin(axis=0, skipna=True)  # doctest: +NORMALIZE_WHITESPACE
+                     speed age weight height
+        species
+        giraffe          p   c      p      c
+        hippopotamus     l   d      l      d
+        lion             i   i      i      i
+        rhino            n   m      m      n
+        tiger            f   g      b      g
+
+        >>> df.groupby("species").idxmin(axis=0, skipna=False)  # doctest: +NORMALIZE_WHITESPACE
+                     speed   age weight height
+        species
+        giraffe          p     c      p      c
+        hippopotamus  None     d      l      d
+        lion             i  None      i   None
+        rhino            n     m      m      n
+        tiger            f     g      b      g
+
+        """
+
+    @property
+    def ndim():
+        """
+        Return 2.
+
+        Returns
+        -------
+        int
+            Returns 2.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+
+    def shift():
+        """
+        Shift each group by periods observations.
+
+        If freq is passed, the index will be increased using the periods and the freq.
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Number of periods to shift.
+        freq : str, optional
+            Frequency string.
+        axis : axis to shift, default 0, axis =1 currently not supported
+            Shift direction.
+        fill_value : optional
+            The scalar value to use for newly introduced missing values.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+            Object shifted within each group.
+
+        Examples:
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'b', 'b']
+        >>> ser = pd.Series([1, 2, 3, 4], index=lst)
+
+        >>> ser
+        a    1
+        a    2
+        b    3
+        b    4
+        dtype: int64
+
+
+        >>> ser.groupby(level=0).shift(1)
+        a    NaN
+        a    1.0
+        b    NaN
+        b    3.0
+        dtype: float64
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["tuna", "salmon", "catfish", "goldfish"])
+
+        >>> df
+                   a  b  c
+            tuna   1  2  3
+          salmon   1  5  6
+         catfish   2  5  8
+        goldfish   2  6  9
+
+        >>> df.groupby("a").shift(1)
+                      b    c
+            tuna    NaN  NaN
+          salmon    2.0  3.0
+         catfish    NaN  NaN
+        goldfish    5.0  8.0
+        """
+
+    def nth():
+        pass
+
+    def cumsum():
+        """
+        Cumulative sum for each group.
+
+        Returns
+        -------
+            Snowpark pandas Series or Snowpark pandas DataFrame
+
+        See also
+        --------
+        Series.groupby
+            Apply a function groupby to a Series.
+        DataFrame.groupby
+            Apply a function groupby to each row or column of a DataFrame.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'b']
+        >>> ser = pd.Series([6, 2, 0], index=lst)
+        >>> ser
+        a    6
+        a    2
+        b    0
+        dtype: int64
+
+        >>> ser.groupby(level=0).cumsum()
+        a    6
+        a    8
+        b    0
+        dtype: int64
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["fox", "gorilla", "lion"])
+        >>> df
+                  a   b   c
+        fox       1   8   2
+        gorilla   1   2   5
+        lion      2   6   9
+
+        >>> df.groupby("a").groups
+        {1: ['fox', 'gorilla'], 2: ['lion']}
+
+        >>> df.groupby("a").cumsum()
+                  b   c
+        fox       8   2
+        gorilla  10   7
+        lion      6   9
+        """
+
+    def indices():
+        """
+        Get a dictionary mapping group key to row positions.
+
+        Returns
+        -------
+        Dict[Any, np.array]
+            Dict {group name -> group positions}.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
+        ...                    'B': [np.nan, 2, 3, 4, 5],
+        ...                    'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
+
+        Groupby one column and get the positions of each member of each group.
+
+        >>> df.groupby('A').indices
+        {1: array([0, 1, 3]), 2: array([2, 4])}
+
+        Group the same dataframe with a different index. The result is the same
+        because the row positions for each group are the same.
+
+        >>> df.set_index('B').groupby('A').indices
+        {1: array([0, 1, 3]), 2: array([2, 4])}
+
+        Notes
+        -----
+        Beware that the return value is a python dictionary, so evaluating this
+        property will trigger evaluation of the pandas dataframe and will
+        materialize data that could be as large as the size of the grouping
+        columns.
+        """
+
+    def pct_change():
+        pass
+
+    def filter():
+        pass
+
+    def cummax():
+        """
+        Cumulative max for each group.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+
+        See also
+        --------
+        Series.groupby
+            Apply a function groupby to a Series.
+        DataFrame.groupby
+            Apply a function groupby to each row or column of a DataFrame.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
+        >>> ser = pd.Series([1, 6, 2, 3, 1, 4], index=lst)
+        >>> ser
+        a    1
+        a    6
+        a    2
+        b    3
+        b    1
+        b    4
+        dtype: int64
+        >>> ser.groupby(level=0).cummax()
+        a    1
+        a    6
+        a    6
+        b    3
+        b    3
+        b    4
+        dtype: int64
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 8, 2], [1, 1, 0], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["cow", "horse", "bull"])
+        >>> df
+                a   b   c
+        cow     1   8   2
+        horse   1   1   0
+        bull    2   6   9
+        >>> df.groupby("a").groups
+        {1: ['cow', 'horse'], 2: ['bull']}
+        >>> df.groupby("a").cummax()
+                b   c
+        cow     8   2
+        horse   8   2
+        bull    6   9
+        """
+
+    def apply():
+        """
+        Apply function ``func`` group-wise and combine the results together.
+
+        The function passed to ``apply`` must take a dataframe as its first
+        argument and return a DataFrame, Series or scalar. ``apply`` will
+        then take care of combining the results back together into a single
+        dataframe or series. ``apply`` is therefore a highly flexible
+        grouping method.
+
+        While ``apply`` is a very flexible method, its downside is that
+        using it can be quite a bit slower than using more specific methods
+        like ``agg`` or ``transform``. Pandas offers a wide range of methods that will
+        be much faster than using ``apply`` for their specific purposes, so try to
+        use them before reaching for ``apply``.
+
+        Parameters
+        ----------
+        func : callable
+            A callable that takes a dataframe as its first argument, and
+            returns a dataframe, a series or a scalar. In addition the
+            callable may take positional and keyword arguments.
+        args, kwargs : tuple and dict
+            Optional positional and keyword arguments to pass to ``func``.
+
+        Returns
+        -------
+        applied : Snowpark pandas Series or Snowpark pandas DataFrame
+
+        See Also
+        --------
+        pipe : Apply function to the full GroupBy object instead of to each
+            group.
+        aggregate : Apply aggregate function to the GroupBy object.
+        transform : Apply function column-by-column to the GroupBy object.
+        Series.apply : Apply a function to a Series.
+        DataFrame.apply : Apply a function to each row or column of a DataFrame.
+
+        Notes
+        -----
+
+        Functions that mutate the passed object can produce unexpected
+        behavior or errors and are not supported.
+
+
+        Returning a series or scalar in ``func`` is not supported yet.
+
+        Examples
+        --------
+
+        >>> df = pd.DataFrame({'A': 'a a b'.split(),
+        ...                    'B': [1,2,3],
+        ...                    'C': [4,6,5]})
+        >>> g1 = df.groupby('A', group_keys=False)
+        >>> g2 = df.groupby('A', group_keys=True)
+
+        Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only
+        differ in their ``group_keys`` argument. Calling `apply` in various ways,
+        we can get different grouping results:
+
+        Example 1: below the function passed to `apply` takes a DataFrame as
+        its argument and returns a DataFrame. `apply` combines the result for
+        each group together into a new DataFrame:
+
+        >>> g1[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +NORMALIZE_WHITESPACE
+                  B    C
+        0  0.333333  0.4
+        1  0.666667  0.6
+        2  1.000000  1.0
+
+        In the above, the groups are not part of the index. We can have them included
+        by using ``g2`` where ``group_keys=True``:
+
+        >>> g2[['B', 'C']].apply(lambda x: x.select_dtypes('number') / x.select_dtypes('number').sum()) # doctest: +NORMALIZE_WHITESPACE
+                    B    C
+        A
+        a 0  0.333333  0.4
+          1  0.666667  0.6
+        b 2  1.000000  1.0
+        """
+
+    @property
+    def dtypes():
+        pass
+
+    def first():
+        pass
+
+    def __getitem__():
+        """
+        Implement indexing operation on a DataFrameGroupBy object.
+
+        Parameters
+        ----------
+        key : list or str
+            Names of columns to use as subset of original object.
+
+        Returns
+        -------
+        DataFrameGroupBy or SeriesGroupBy
+            Result of indexing operation.
+
+        Raises
+        ------
+        NotImplementedError
+            Column lookups on GroupBy when selected column overlaps with the by columns.
+
+            we currently do not support select data columns that overlaps with by columns, like
+            df.groupby("A")["A", "C"], where column "A" occurs in both the groupby and column selection.
+            This is because in regular groupby, one by column cannot be mapped to multiple columns,
+            for example with a dataframe have columns=['A', 'B', 'A'], where 'A' corresponds to two columns,
+            df.groupby('A') will raise an error. However, with getitem, the new columns selected
+            is treated differently and they can be duplicate of the by column. For example: it is valid to
+            have df.groupby("A")["A", "A", "C"] even though the result dataframe after colum select have
+            multiple column "A".
+            In order to handle this correctly, we need to record the columns selected and move the actual column
+            selection to query backend. Proper fallback with column selection is also required.
+            Since there is no such usage in our current known usage pattern, and Modin does not support this case.
+            We raise a NotImplementedError, and deffer the support to later.
+            TODO (SNOW-894942): Handle getitem overlap with groupby column
+        """
+
+    def cummin():
+        """
+        Cumulative min for each group.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+
+        See also
+        --------
+        Series.groupby
+            Apply a function groupby to a Series.
+        DataFrame.groupby
+            Apply a function groupby to each row or column of a DataFrame.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
+        >>> ser = pd.Series([1, 6, 2, 3, 0, 4], index=lst)
+        >>> ser
+        a    1
+        a    6
+        a    2
+        b    3
+        b    0
+        b    4
+        dtype: int64
+        >>> ser.groupby(level=0).cummin()
+        a    1
+        a    1
+        a    1
+        b    3
+        b    0
+        b    0
+        dtype: int64
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 0, 2], [1, 1, 5], [6, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["snake", "rabbit", "turtle"])
+        >>> df
+                a   b   c
+        snake   1   0   2
+        rabbit  1   1   5
+        turtle  6   6   9
+        >>> df.groupby("a").groups
+        {1: ['snake', 'rabbit'], 6: ['turtle']}
+        >>> df.groupby("a").cummin()
+                b   c
+        snake   0   2
+        rabbit  0   2
+        turtle  6   9
+        """
+
+    def bfill():
+        pass
+
+    def prod():
+        pass
+
+    def std():
+        """
+        Compute standard deviation of groups, excluding missing values.
+
+        For multiple groupings, the result index will be a MultiIndex.
+
+        Parameters
+        ----------
+        ddof : int, default 1.
+            Degrees of freedom.
+            When ddof is 0/1, the operation is executed with Snowflake. Otherwise, it falls back with
+            native pandas using stored procedure.
+
+
+        engine : str, default None
+            In pandas engine can be configured as ``'cython'`` or ``'numba'`` , and ``None`` defaults to
+            ``'cython'`` or globally setting ``compute.use_numba``.
+            This parameter is ignored in Snowpark pandas API. The execution engine will always be snowflake. (Same as
+            var)
+
+        engine_kwargs : dict, default None
+            Configuration keywords for the configured execution egine.
+            Same as engine parameter, this parameter is ignored in Snowpark pandas API. (Same as var)
+
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data columns.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+        Standard deviation of values within each group.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+        >>> lst = ['a', 'a', 'a', 'b', 'b', 'b', 'c']
+        >>> ser = pd.Series([7, 2, 8, 4, 3, 3, 1], index=lst)
+        >>> ser
+        a    7
+        a    2
+        a    8
+        b    4
+        b    3
+        b    3
+        c    1
+        dtype: int8
+        >>> ser.groupby(level=0).std()
+        a    3.21455
+        b    0.57735
+        c        NaN
+        dtype: float64
+        >>> ser.groupby(level=0).std(ddof=0)
+        a    2.624669
+        b    0.471404
+        c    0.000000
+        dtype: float64
+
+        Note that if the number of elements in a group is less or equal to the ddof, the result for the
+        group will be NaN/None. For example, the value for group c is NaN when we call ser.groupby(level=0).std(),
+        and the default ddof is 1.
+
+        For DataFrameGroupBy:
+
+        >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
+        >>> df = pd.DataFrame(data, index=pd.Index(['dog', 'dog', 'dog',
+        ...                   'mouse', 'mouse', 'mouse', 'mouse'], name='c'))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+               a  b
+        c
+        dog    1  1
+        dog    3  4
+        dog    5  8
+        mouse  7  4
+        mouse  7  4
+        mouse  8  2
+        mouse  3  1
+        >>> df.groupby('c').std()       # doctest: +NORMALIZE_WHITESPACE
+                      a         b
+        c
+        dog    2.000000  3.511885
+        mouse  2.217356  1.500000
+        >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': ['c', 'e', 'd', 'a', 'a', 'b', 'e']}
+        >>> df = pd.DataFrame(data, index=pd.Index(['dog', 'dog', 'dog',
+        ...                   'mouse', 'mouse', 'mouse', 'mouse'], name='c'))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+               a  b
+        c
+        dog    1  c
+        dog    3  e
+        dog    5  d
+        mouse  7  a
+        mouse  7  a
+        mouse  8  b
+        mouse  3  e
+        >>> df.groupby('c').std(numeric_only=True)       # doctest: +NORMALIZE_WHITESPACE
+                      a
+        c
+        dog    2.000000
+        mouse  2.217356
+        """
+
+    @doc(
+        _agg_template,
+        examples=_agg_examples_dataframe_doc,
+        klass="DataFrame",
+    )
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def last():
+        pass
+
+    def rank():
+        """
+        Provide the rank of values within each group.
+
+        Parameters
+        ----------
+        method: {"average", "min", "max", "first", "dense"}
+            How to rank the group of records that have the same value (i.e. break ties):
+            - average: average rank of the group
+            - min: lowest rank in the group
+            - max: highest rank in the group
+            - first: ranks assigned in order they appear in the array
+            - dense: like 'min', but rank always increases by 1 between groups.
+        ascending: bool
+            Whether the elements should be ranked in ascending order.
+        na_option: {"keep", "top", "bottom"}
+            How to rank NaN values:
+            - keep: assign NaN rank to NaN values
+            - top: assign lowest rank to NaN values
+            - bottom: assign highest rank to NaN values
+        pct: bool
+            Whether to display the returned rankings in percentile form.
+        axis: 0
+
+        Returns
+        -------
+            Snowpark pandas Series or Snowpark pandas DataFrame with ranking of values within each group
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({"group": ["a", "a", "a", "b", "b", "b", "b"], "value": [2, 4, 2, 3, 5, 1, 2]})
+        >>> df
+            group   value
+        0	    a	    2
+        1	    a	    4
+        2	    a	    2
+        3       b       3
+        4       b       5
+        5       b       1
+        6       b       2
+        >>> df = df.groupby("group").rank(method='min')
+        >>> df
+            value
+        0   1.0
+        1	3.0
+        2	1.0
+        3   3.0
+        4   4.0
+        5   1.0
+        6   2.0
+        """
+        pass
+
+    @property
+    def corrwith():
+        pass
+
+    @doc(
+        _groupby_agg_method_engine_template,
+        fname="max",
+        no=False,
+        mc=-1,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'b', 'b']
+        >>> ser = pd.Series([1, 2, 3, 4], index=lst)
+        >>> ser
+        a    1
+        a    2
+        b    3
+        b    4
+        dtype: int8
+        >>> ser.groupby(level=0).max()
+        a    2
+        b    4
+        dtype: int8
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["tiger", "leopard", "cheetah", "lion"])
+        >>> df
+                 a  b  c
+        tiger    1  8  2
+        leopard  1  2  5
+        cheetah  2  5  8
+        lion     2  6  9
+        >>> df.groupby("a").max()
+           b  c
+        a
+        1  8  5
+        2  6  9"""
+        ),
+    )
+    def max():
+        pass
+
+    def var():
+        """
+        Compute variance of groups, excluding missing values.
+
+        For multiple groupings, the result index will be a MultiIndex.
+
+        Parameters
+        ----------
+        ddof : int, default 1
+            Degrees of freedom.
+            When ddof is 0/1, the operation is executed with Snowflake. Otherwise, it falls back with
+            native pandas using stored procedure.
+
+        engine : str, default None
+            In pandas engine can be configured as ``'cython'`` or ``'numba'`` , and ``None`` defaults to
+            ``'cython'`` or globally setting ``compute.use_numba``.
+            This parameter is ignored in Snowpark pandas API. The execution engine will always be snowflake. (Same as
+            std)
+
+        engine_kwargs : dict, default None
+            Configuration keywords for the configured execution egine.
+            Same as engine parameter, this parameter is ignored in Snowpark pandas API.  (Same as std)
+
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data columns.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+            Variance of values within each group.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+        >>> lst = ['a', 'a', 'a', 'b', 'b', 'b', 'c']
+        >>> ser = pd.Series([7, 2, 8, 4, 3, 3, 1], index=lst)
+        >>> ser
+        a    7
+        a    2
+        a    8
+        b    4
+        b    3
+        b    3
+        c    1
+        dtype: int8
+        >>> ser.groupby(level=0).var()
+        a    10.333333
+        b     0.333333
+        c         None
+        dtype: object
+        >>> ser.groupby(level=0).var(ddof=0)
+        a    6.888889
+        b    0.222222
+        c    0.000000
+        dtype: object
+
+        Note that if the number of elements in a group is less or equal to the ddof, the result for the
+        group will be NaN/None. For example, the value for group c is NaN when we call ser.groupby(level=0).var(),
+        and the default ddof is 1.
+
+        For DataFrameGroupBy:
+
+        >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
+        >>> df = pd.DataFrame(data, index=pd.Index(['dog', 'dog', 'dog',
+        ...                   'mouse', 'mouse', 'mouse', 'mouse'], name='c'))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+               a  b
+        c
+        dog    1  1
+        dog    3  4
+        dog    5  8
+        mouse  7  4
+        mouse  7  4
+        mouse  8  2
+        mouse  3  1
+        >>> df.groupby('c').var()       # doctest: +NORMALIZE_WHITESPACE
+                      a          b
+        c
+        dog    4.000000  12.333333
+        mouse  4.916667   2.250000
+        >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': ['c', 'e', 'd', 'a', 'a', 'b', 'e']}
+        >>> df = pd.DataFrame(data, index=pd.Index(['dog', 'dog', 'dog',
+        ...                   'mouse', 'mouse', 'mouse', 'mouse'], name='c'))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+               a  b
+        c
+        dog    1  c
+        dog    3  e
+        dog    5  d
+        mouse  7  a
+        mouse  7  a
+        mouse  8  b
+        mouse  3  e
+        >>> df.groupby('c').var(numeric_only=True)       # doctest: +NORMALIZE_WHITESPACE
+                      a
+        c
+        dog    4.000000
+        mouse  4.916667
+        """
+
+    def get_group():
+        pass
+
+    def __len__():
+        pass
+
+    def all():
+        pass
+
+    def size():
+        pass
+
+    @doc(
+        _groupby_agg_method_engine_template,
+        fname="sum",
+        no=False,
+        mc=0,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'b', 'b']
+        >>> ser = pd.Series([1, 2, 3, 4], index=lst)
+        >>> ser
+        a    1
+        a    2
+        b    3
+        b    4
+        dtype: int8
+        >>> ser.groupby(level=0).sum()
+        a    3
+        b    7
+        dtype: int8
+
+        For DataFrameGroupBy:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+        ...                   index=["tiger", "leopard", "cheetah", "lion"])
+        >>> df
+                 a  b  c
+        tiger    1  8  2
+        leopard  1  2  5
+        cheetah  2  5  8
+        lion     2  6  9
+        >>> df.groupby("a").sum()
+            b   c
+        a
+        1  10   7
+        2  11  17"""
+        ),
+    )
+    def sum():
+        pass
+
+    def describe():
+        pass
+
+    def boxplot():
+        pass
+
+    def ngroup():
+        pass
+
+    def nunique():
+        pass
+
+    def resample():
+        pass
+
+    def sample():
+        pass
+
+    def median():
+        """
+        Compute median of groups, excluding missing values.
+
+        For multiple groupings, the result index will be a MultiIndex.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        Returns
+        -------
+        Snowpark pandas Series or Snowpark pandas DataFrame
+            Median of values within each group.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+        >>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
+        >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst)
+        >>> ser
+        a    7
+        a    2
+        a    8
+        b    4
+        b    3
+        b    3
+        dtype: int8
+        >>> ser.groupby(level=0).median()
+        a    7.000
+        b    3.000
+        dtype: object
+
+        For DataFrameGroupBy:
+
+        >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
+        >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog',
+        ...                   'mouse', 'mouse', 'mouse', 'mouse'])
+        >>> df
+               a  b
+        dog    1  1
+        dog    3  4
+        dog    5  8
+        mouse  7  4
+        mouse  7  4
+        mouse  8  2
+        mouse  3  1
+        >>> df.groupby(level=0).median()
+                   a      b
+        dog    3.000  4.000
+        mouse  7.000  3.000
+        """
+
+    def head():
+        """
+        Return first n rows of each group.
+
+        Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows
+        from the original DataFrame with original index and order preserved
+        (``as_index`` flag is ignored).
+
+        Parameters
+        ----------
+        n : int
+            If positive: number of entries to include from start of each group.
+            If negative: number of entries to exclude from end of each group.
+
+        Returns
+        -------
+        Series or DataFrame
+            Subset of original Series or DataFrame as determined by n.
+
+        See also
+        --------
+        Series.groupby
+            Apply a function groupby to a Series.
+
+        DataFrame.groupby
+            Apply a function groupby to each row or column of a DataFrame.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]],
+        ...                   columns=['A', 'B'])
+        >>> df.groupby('A').head(1)
+           A  B
+        0  1  2
+        2  5  6
+        >>> df.groupby('A').head(-1)
+           A  B
+        0  1  2
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "col1": ["Z", None, "X", "Z", "Y", "X", "X", None, "X", "Y"],
+        ...         "col2": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        ...         "col3": [40, 50, 60, 10, 20, 30, 40, 80, 90, 10],
+        ...         "col4": [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10],
+        ...     },
+        ...     index=list("abcdefghij"),
+        ... )
+        >>> df
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        c     X     3    60    -3
+        d     Z     4    10    -4
+        e     Y     5    20    -5
+        f     X     6    30    -6
+        g     X     7    40    -7
+        h  None     8    80    -8
+        i     X     9    90    -9
+        j     Y    10    10   -10
+        >>> df.groupby("col1", dropna=False).head(2)
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        c     X     3    60    -3
+        d     Z     4    10    -4
+        e     Y     5    20    -5
+        f     X     6    30    -6
+        h  None     8    80    -8
+        j     Y    10    10   -10
+        >>> df.groupby("col1", dropna=False).head(-2)
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        c     X     3    60    -3
+        e     Y     5    20    -5
+        """
+
+    def cumprod():
+        pass
+
+    def __iter__():
+        pass
+
+    def cov():
+        pass
+
+    def transform():
+        """
+        Call function producing a same-indexed DataFrame on each group.
+
+        Returns a DataFrame having the same indexes as the original object
+        filled with the transformed values.
+
+        Parameters
+        ----------
+        func : function, str
+            Function to apply to each group. See the Notes section below for requirements.
+
+            Accepted inputs are:
+
+            - String (needs to be the name of groupby method you want to use)
+            - Python function
+
+        *args : Any
+            Positional arguments to pass to func.
+        engine : str, default None
+            * ``'cython'`` : Runs the function through C-extensions from cython.
+            * ``'numba'`` : Runs the function through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or the global setting ``compute.use_numba``
+
+            Note that this parameter is ignored in Snowpark pandas, and the execution engine will always
+            be Snowflake.
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+              and ``parallel`` dictionary keys. The values must either be ``True`` or
+              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+              applied to the function
+
+            Note that this parameter is ignored in Snowpark pandas, and the execution engine will always
+            be Snowflake.
+
+        **kwargs : Any
+            Keyword arguments to be passed into func.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "col1": ["Z", None, "X", "Z", "Y", "X", "X", None, "X", "Y"],
+        ...         "col2": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        ...         "col3": [40, 50, 60, 10, 20, 30, 40, 80, 90, 10],
+        ...         "col4": [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10],
+        ...     },
+        ...     index=list("abcdefghij")
+        ... )
+        >>> df
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        c     X     3    60    -3
+        d     Z     4    10    -4
+        e     Y     5    20    -5
+        f     X     6    30    -6
+        g     X     7    40    -7
+        h  None     8    80    -8
+        i     X     9    90    -9
+        j     Y    10    10   -10
+
+        >>> df.groupby("col1", dropna=True).transform(lambda df, n: df.head(n), n=2)
+           col2  col3  col4
+        a   1.0  40.0  -1.0
+        b   NaN   NaN   NaN
+        c   3.0  60.0  -3.0
+        d   4.0  10.0  -4.0
+        e   5.0  20.0  -5.0
+        f   6.0  30.0  -6.0
+        g   NaN   NaN   NaN
+        h   NaN   NaN   NaN
+        i   NaN   NaN   NaN
+        j  10.0  10.0 -10.0
+
+        >>> df.groupby("col1", dropna=False).transform("mean")
+           col2  col3  col4
+        a  2.50  25.0 -2.50
+        b  5.00  65.0 -5.00
+        c  6.25  55.0 -6.25
+        d  2.50  25.0 -2.50
+        e  7.50  15.0 -7.50
+        f  6.25  55.0 -6.25
+        g  6.25  55.0 -6.25
+        h  5.00  65.0 -5.00
+        i  6.25  55.0 -6.25
+        j  7.50  15.0 -7.50
+        """
+
+    def corr():
+        pass
+
+    def fillna():
+        pass
+
+    def count():
+        """
+        Compute count of group, excluding missing values.
+
+        Returns
+        -------
+        A :class:`Series` or :class:`DataFrame`
+            Count of values within each group.
+
+        Examples
+        --------
+        For SeriesGroupBy:
+
+            >>> lst = ['a', 'a', 'b']
+            >>> ser = pd.Series([1, 2, np.nan], index=lst)
+            >>> ser
+            a    1.0
+            a    2.0
+            b    NaN
+            dtype: float64
+            >>> ser.groupby(level=0).count()
+            a    2
+            b    0
+            dtype: int64
+
+        For DataFrameGroupBy:
+
+            >>> data = [[1, np.nan, 3], [1, np.nan, 6], [7, 8, 9]]
+            >>> df = pd.DataFrame(data, columns=["a", "b", "c"],
+            ...                   index=["cow", "horse", "bull"])
+            >>> df
+                   a    b  c
+            cow    1  NaN  3
+            horse  1  NaN  6
+            bull   7  8.0  9
+            >>> df.groupby("a").count()     # doctest: +NORMALIZE_WHITESPACE
+               b  c
+            a
+            1  0  2
+            7  1  1
+        """
+
+    def cumcount():
+        """
+        Number each item in each group from 0 to the length of that group - 1.
+
+        Essentially this is equivalent to
+
+        >>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            If False, number in reverse, from length of group - 1 to 0.
+
+        Returns
+        -------
+        Series
+            Sequence number of each element within each group.
+
+        See also
+        --------
+        ngroup
+            Number the groups themselves.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']],
+        ...                   columns=['A'])
+        df
+        A
+        0  a
+        1  a
+        2  a
+        3  b
+        4  b
+        5  a
+
+        >>> df.groupby('A').cumcount()
+        0    0
+        1    1
+        2    2
+        3    0
+        4    1
+        5    3
+        dtype: int64
+
+        >>> df.groupby('A').cumcount(ascending=False)
+        0    3
+        1    2
+        2    1
+        3    1
+        4    0
+        5    0
+        dtype: int64
+        """
+
+    def tail():
+        """
+        Return last n rows of each group.
+
+        Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows
+        from the original DataFrame with original index and order preserved
+        (``as_index`` flag is ignored).
+
+        Parameters
+        ----------
+        n : int
+            If positive: number of entries to include from end of each group.
+            If negative: number of entries to exclude from start of each group.
+
+        Returns
+        -------
+        Series or DataFrame
+            Subset of original Series or DataFrame as determined by n.
+
+        See also
+        --------
+        Series.groupby
+            Apply a function groupby to a Series.
+
+        DataFrame.groupby
+            Apply a function groupby to each row or column of a DataFrame.
+
+        Examples
+        --------
+
+        >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]],
+        ...                   columns=['A', 'B'])
+        >>> df.groupby('A').tail(1)
+           A  B
+        1  a  2
+        3  b  2
+        >>> df.groupby('A').tail(-1)
+           A  B
+        1  a  2
+        3  b  2
+
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "col1": ["Z", None, "X", "Z", "Y", "X", "X", None, "X", "Y"],
+        ...         "col2": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        ...         "col3": [40, 50, 60, 10, 20, 30, 40, 80, 90, 10],
+        ...         "col4": [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10],
+        ...     },
+        ...     index=list("abcdefghij"),
+        ... )
+        >>> df
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        c     X     3    60    -3
+        d     Z     4    10    -4
+        e     Y     5    20    -5
+        f     X     6    30    -6
+        g     X     7    40    -7
+        h  None     8    80    -8
+        i     X     9    90    -9
+        j     Y    10    10   -10
+        >>> df.groupby("col1", dropna=False).tail(2)
+           col1  col2  col3  col4
+        a     Z     1    40    -1
+        b  None     2    50    -2
+        d     Z     4    10    -4
+        e     Y     5    20    -5
+        g     X     7    40    -7
+        h  None     8    80    -8
+        i     X     9    90    -9
+        j     Y    10    10   -10
+        >>> df.groupby("col1", dropna=False).tail(-2)
+           col1  col2  col3  col4
+        g     X     7    40    -7
+        i     X     9    90    -9
+        """
+
+    def expanding():
+        pass
+
+    def rolling():
+        pass
+
+    def hist():
+        pass
+
+    def quantile():
+        pass
+
+    def diff():
+        pass
+
+    def take():
+        pass
+
+
+class SeriesGroupBy:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    @property
+    def ndim(self):
+        """
+        Return 1.
+
+        Returns
+        -------
+        int
+            Returns 1.
+
+        Notes
+        -----
+        Deprecated and removed in pandas and will be likely removed in Modin.
+        """
+
+    @property
+    def is_monotonic_decreasing():
+        pass
+
+    @property
+    def is_monotonic_increasing():
+        pass
+
+    @doc(_agg_template, examples=_agg_series_examples_doc, klass="Series")
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def nlargest():
+        pass
+
+    def nsmallest():
+        pass
+
+    def nunique():
+        pass
+
+    def apply():
+        pass
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/resample.py b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py
new file mode 100644
index 00000000000..2f09e4a7575
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py
@@ -0,0 +1,957 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains Resampler docstrings that override modin's docstrings."""
+
+from textwrap import dedent
+
+from pandas.util._decorators import doc
+
+
+class Resampler:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    def __getitem__(self, key):  # pragma: no cover
+        """
+        Get ``Resampler`` based on `key` columns of original dataframe.
+
+        Parameters
+        ----------
+        key : str or list
+            String or list of selections.
+
+        Returns
+        -------
+        modin.pandas.BasePandasDataset
+            New ``Resampler`` based on `key` columns subset
+            of the original dataframe.
+        """
+
+    @property
+    def groups():
+        pass
+
+    @property
+    def indices():
+        pass
+
+    def get_group():
+        pass
+
+    _shared_docs = dedent(
+        """
+    Aggregate using one or more operations over the specified axis.
+
+    Parameters
+    ----------
+    func : function, str, list or dict
+        Function to use for aggregating the data. If a function, must either
+        work when passed a {klass} or when passed to {klass}.apply.
+
+        Accepted combinations are:
+
+        - function
+        - string function name
+        - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+        - dict of axis labels -> functions, function names or list of such.
+
+    *args
+        Positional arguments to pass to `func`.
+    **kwargs
+        Keyword arguments to pass to `func`.
+    {axis}
+    Returns
+    -------
+    scalar, Series or DataFrame
+
+        The return can be:
+
+        * scalar : when Series.agg is called with single function
+        * Series : when DataFrame.agg is called with a single function
+        * DataFrame : when DataFrame.agg is called with several functions
+
+        Return scalar, Series or DataFrame.
+    {see_also}
+    Notes
+    -----
+    `agg` is an alias for `aggregate`. Use the alias.
+
+    A passed user-defined-function will be passed a Series for evaluation.
+    {examples}"""
+    )
+
+    _agg_see_also_doc = dedent(
+        """
+    See Also
+    --------
+    DataFrame.groupby.aggregate : Aggregate using callable, string, dict,
+        or list of string/callables.
+    DataFrame.resample.transform : Transforms the Series on each group
+        based on the given function.
+    DataFrame.aggregate: Aggregate using one or more
+        operations over the specified axis.
+    """
+    )
+
+    _agg_examples_doc = dedent(
+        """
+    Examples
+    --------
+
+    >>> s = pd.Series([1, 2, 3, 4, 5],
+    ...               index=pd.date_range('20130101', periods=5, freq='s'))
+    >>> s
+    2013-01-01 00:00:00    1
+    2013-01-01 00:00:01    2
+    2013-01-01 00:00:02    3
+    2013-01-01 00:00:03    4
+    2013-01-01 00:00:04    5
+    Freq: None, dtype: int8
+
+    >>> r = s.resample('2s')
+
+    >>> r.agg(np.sum)
+    2013-01-01 00:00:00    3
+    2013-01-01 00:00:02    7
+    2013-01-01 00:00:04    5
+    Freq: None, dtype: int8
+
+    >>> r.agg(['sum', 'mean', 'max'])
+                         sum  mean  max
+    2013-01-01 00:00:00    3   1.5    2
+    2013-01-01 00:00:02    7   3.5    4
+    2013-01-01 00:00:04    5   5.0    5
+
+    >>> r.agg({'result': lambda x: x.mean() / x.std(),
+    ...        'total': np.sum})
+                           result  total
+    2013-01-01 00:00:00  2.121320      3
+    2013-01-01 00:00:02  4.949747      7
+    2013-01-01 00:00:04       NaN      5
+
+    """
+    )
+
+    @doc(
+        _shared_docs,
+        see_also=_agg_see_also_doc,
+        examples=_agg_examples_doc,
+        klass="DataFrame",
+        axis="",
+    )
+    def apply():
+        pass
+
+    @doc(
+        _shared_docs,
+        see_also=_agg_see_also_doc,
+        examples=_agg_examples_doc,
+        klass="DataFrame",
+        axis="",
+    )
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def transform():
+        pass
+
+    def pipe():
+        pass
+
+    def ffill():
+        """
+        Forward fill values for missing resample bins.
+
+        Parameters
+        ----------
+        limit : int, optional
+            This parameter is not supported and will raise a NotImplementedError.
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            A DataFrame with values forward filled for missing resample bins.
+
+        Examples
+        --------
+        For Series:
+        >>> lst1 = pd.to_datetime(['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-07', '2020-01-08'])
+        >>> ser1 = pd.Series([1, 2, 3, 4, 5], index=lst1)
+        >>> ser1
+        2020-01-03    1
+        2020-01-04    2
+        2020-01-05    3
+        2020-01-07    4
+        2020-01-08    5
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('1D').ffill()
+        2020-01-03    1
+        2020-01-04    2
+        2020-01-05    3
+        2020-01-06    3
+        2020-01-07    4
+        2020-01-08    5
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('3D').ffill()
+        2020-01-03    1
+        2020-01-06    3
+        Freq: None, dtype: int8
+
+        >>> lst2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])
+        >>> ser2 = pd.Series([1, 2, 3, 4, None, 6], index=lst2)
+        >>> ser2
+        2023-01-03 01:00:00    1.0
+        2023-01-04 00:00:00    2.0
+        2023-01-05 23:00:00    3.0
+        2023-01-06 00:00:00    4.0
+        2023-01-07 02:00:00    NaN
+        2023-01-10 00:00:00    6.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('1D').ffill()
+        2023-01-03    NaN
+        2023-01-04    2.0
+        2023-01-05    2.0
+        2023-01-06    4.0
+        2023-01-07    4.0
+        2023-01-08    NaN
+        2023-01-09    NaN
+        2023-01-10    6.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2D').ffill()
+        2023-01-03    NaN
+        2023-01-05    2.0
+        2023-01-07    4.0
+        2023-01-09    NaN
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> index1 = pd.to_datetime(['2020-01-03', '2020-01-04', '2020-01-05', '2020-01-07', '2020-01-08'])
+        >>> df1 = pd.DataFrame({'a': range(len(index1)),
+        ... 'b': range(len(index1) + 10, len(index1) * 2 + 10)},
+        ...  index=index1)
+        >>> df1
+                    a   b
+        2020-01-03  0  15
+        2020-01-04  1  16
+        2020-01-05  2  17
+        2020-01-07  3  18
+        2020-01-08  4  19
+
+        >>> df1.resample('1D').ffill()
+                    a   b
+        2020-01-03  0  15
+        2020-01-04  1  16
+        2020-01-05  2  17
+        2020-01-06  2  17
+        2020-01-07  3  18
+        2020-01-08  4  19
+
+        >>> df1.resample('3D').ffill()
+                    a   b
+        2020-01-03  0  15
+        2020-01-06  2  17
+
+        >>> index2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])
+        >>> df2 = pd.DataFrame({'a': range(len(index2)),
+        ... 'b': range(len(index2) + 10, len(index2) * 2 + 10)},
+        ...  index=index2)
+        >>> df2
+                             a   b
+        2023-01-03 01:00:00  0  16
+        2023-01-04 00:00:00  1  17
+        2023-01-05 23:00:00  2  18
+        2023-01-06 00:00:00  3  19
+        2023-01-07 02:00:00  4  20
+        2023-01-10 00:00:00  5  21
+
+        >>> df2.resample('1D').ffill()
+                      a     b
+        2023-01-03  NaN   NaN
+        2023-01-04  1.0  17.0
+        2023-01-05  1.0  17.0
+        2023-01-06  3.0  19.0
+        2023-01-07  3.0  19.0
+        2023-01-08  4.0  20.0
+        2023-01-09  4.0  20.0
+        2023-01-10  5.0  21.0
+
+        >>> df2.resample('2D').ffill()
+                      a     b
+        2023-01-03  NaN   NaN
+        2023-01-05  1.0  17.0
+        2023-01-07  3.0  19.0
+        2023-01-09  4.0  20.0
+        """
+
+    def backfill():
+        pass
+
+    def bfill():
+        pass
+
+    def pad():
+        pass
+
+    def nearest():
+        pass
+
+    def fillna():
+        pass
+
+    def asfreq():
+        pass
+
+    def interpolate():
+        pass
+
+    def count():
+        """
+        Compute count of resample bins.
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed count of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').count()
+        2020-01-01    2
+        2020-01-03    2
+        Freq: None, dtype: int8
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').count()
+        2020-01-01 00:00:00    2
+        2020-01-01 00:00:02    1
+        Freq: None, dtype: int64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').count()
+                    a  b  c
+        2020-01-01  2  2  2
+        2020-01-03  2  2  2
+        """
+
+    def nunique():
+        pass
+
+    def first():
+        pass
+
+    def last():
+        pass
+
+    def max():
+        """
+        Compute maximum of resample bins.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed maximum of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').max()
+        2020-01-01    2
+        2020-01-03    4
+        Freq: None, dtype: int8
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').max()
+        2020-01-01 00:00:00    2.0
+        2020-01-01 00:00:02    4.0
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8], [1, 2], [2, 5], [2, 6]]
+        >>> df1 = pd.DataFrame(data,
+        ... columns=["a", "b"],
+        ... index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df1
+                    a  b
+        2020-01-01  1  8
+        2020-01-02  1  2
+        2020-01-03  2  5
+        2020-01-04  2  6
+
+        >>> df1.resample('2D').max()
+                    a  b
+        2020-01-01  1  8
+        2020-01-03  2  6
+
+        >>> df2 = pd.DataFrame(
+        ... {'A': [1, 2, 3, np.nan], 'B': [np.nan, np.nan, 3, 4]},
+        ... index=pd.date_range('2020-01-01', periods=4, freq='1S'))
+        >>> df2
+                               A    B
+        2020-01-01 00:00:00  1.0  NaN
+        2020-01-01 00:00:01  2.0  NaN
+        2020-01-01 00:00:02  3.0  3.0
+        2020-01-01 00:00:03  NaN  4.0
+
+        >>> df2.resample('2S').max()
+                               A    B
+        2020-01-01 00:00:00  2.0  NaN
+        2020-01-01 00:00:02  3.0  4.0
+        """
+
+    def mean():
+        """
+        Compute mean of resample bins.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed mean of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').mean()
+        2020-01-01    1.500000
+        2020-01-03    3.500000
+        Freq: None, dtype: object
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').mean()
+        2020-01-01 00:00:00    1.5
+        2020-01-01 00:00:02    4.0
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> df1 = pd.DataFrame(
+        ... {'A': [1, 1, 2, 1, 2], 'B': [np.nan, 2, 3, 4, 5]},
+        ... index=pd.date_range('2020-01-01', periods=5, freq='1D'))
+        >>> df1
+                    A    B
+        2020-01-01  1  NaN
+        2020-01-02  1  2.0
+        2020-01-03  2  3.0
+        2020-01-04  1  4.0
+        2020-01-05  2  5.0
+
+        >>> df1.resample('2D').mean()
+                           A    B
+        2020-01-01  1.000000  2.0
+        2020-01-03  1.500000  3.5
+        2020-01-05  2.000000  5.0
+
+        >>> df1.resample('2D')['B'].mean()
+        2020-01-01    2.0
+        2020-01-03    3.5
+        2020-01-05    5.0
+        Freq: None, Name: B, dtype: float64
+
+        >>> df2 = pd.DataFrame(
+        ... {'A': [1, 2, 3, np.nan], 'B': [np.nan, np.nan, 3, 4]},
+        ... index=pd.date_range('2020-01-01', periods=4, freq='1S'))
+        >>> df2
+                               A    B
+        2020-01-01 00:00:00  1.0  NaN
+        2020-01-01 00:00:01  2.0  NaN
+        2020-01-01 00:00:02  3.0  3.0
+        2020-01-01 00:00:03  NaN  4.0
+
+        >>> df2.resample('2S').mean()
+                               A    B
+        2020-01-01 00:00:00  1.5  NaN
+        2020-01-01 00:00:02  3.0  3.5
+        """
+
+    def median():
+        """
+        Compute median of resample bins.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed median of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').median()
+        2020-01-01    1.5
+        2020-01-03    3.5
+        Freq: None, dtype: float64
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').median()
+        2020-01-01 00:00:00    1.5
+        2020-01-01 00:00:02    4.0
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').median()
+                    a    b    c
+        2020-01-01  1.0  5.0  3.5
+        2020-01-03  2.0  5.5  8.5
+        """
+
+    def min():
+        """
+        Compute minimum of resample bins.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed minimum of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').min()
+        2020-01-01    1
+        2020-01-03    3
+        Freq: None, dtype: int8
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').min()
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:02    4.0
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').min()
+                    a  b  c
+        2020-01-01  1  2  2
+        2020-01-03  2  5  8
+        """
+
+    def ohlc():
+        pass
+
+    def prod():
+        pass
+
+    def size():
+        pass
+
+    def sem():
+        pass
+
+    def std():
+        """
+        Compute standard deviation of resample bins.
+
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+            where N represents the number of elements.
+
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed standard deviation of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').std()
+        2020-01-01    0.707107
+        2020-01-03    0.707107
+        Freq: None, dtype: float64
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').std()
+        2020-01-01 00:00:00    0.707107
+        2020-01-01 00:00:02    NaN
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').std()
+                    a   b        c
+        2020-01-01  0.0 4.242641 2.121320
+        2020-01-03  0.0 0.707107 0.707107
+        """
+
+    def sum():
+        """
+        Compute sum of resample bins.
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed sum of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').sum()
+        2020-01-01    3
+        2020-01-03    7
+        Freq: None, dtype: int8
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').sum()
+        2020-01-01 00:00:00    3.0
+        2020-01-01 00:00:02    4.0
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').sum()
+                    a  b  c
+        2020-01-01  2  10 7
+        2020-01-03  4  11 17
+        """
+
+    def var():
+        """
+        Compute variance of resample bins.
+
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+            where N represents the number of elements.
+
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+        engine : str, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        engine_kwargs : dict, default None
+            **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+        Returns
+        -------
+        :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            Computed variance of values within each resample bin.
+
+        Examples
+        --------
+        For Series:
+
+        >>> lst1 = pd.date_range('2020-01-01', periods=4, freq='1D')
+        >>> ser1 = pd.Series([1, 2, 3, 4], index=lst1)
+        >>> ser1
+        2020-01-01    1
+        2020-01-02    2
+        2020-01-03    3
+        2020-01-04    4
+        Freq: None, dtype: int8
+
+        >>> ser1.resample('2D').var()
+        2020-01-01    0.5
+        2020-01-03    0.5
+        Freq: None, dtype: float64
+
+        >>> lst2 = pd.date_range('2020-01-01', periods=4, freq='S')
+        >>> ser2 = pd.Series([1, 2, np.nan, 4], index=lst2)
+        >>> ser2
+        2020-01-01 00:00:00    1.0
+        2020-01-01 00:00:01    2.0
+        2020-01-01 00:00:02    NaN
+        2020-01-01 00:00:03    4.0
+        Freq: None, dtype: float64
+
+        >>> ser2.resample('2S').var()
+        2020-01-01 00:00:00    0.5
+        2020-01-01 00:00:02    NaN
+        Freq: None, dtype: float64
+
+        For DataFrame:
+
+        >>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
+        >>> df = pd.DataFrame(data,
+        ...      columns=["a", "b", "c"],
+        ...      index=pd.date_range('2020-01-01', periods=4, freq='1D'))
+        >>> df
+                    a  b  c
+        2020-01-01  1  8  2
+        2020-01-02  1  2  5
+        2020-01-03  2  5  8
+        2020-01-04  2  6  9
+
+        >>> df.resample('2D').var()
+                    a   b    c
+        2020-01-01  0.0 18.0 4.5
+        2020-01-03  0.0 0.5  0.5
+        """
+
+    def quantile():
+        pass
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
new file mode 100644
index 00000000000..bf8c178e42c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py
@@ -0,0 +1,3077 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains Series docstrings that override modin's docstrings."""
+
+from textwrap import dedent
+
+import pandas
+from pandas.util._decorators import doc
+
+from snowflake.snowpark.modin.pandas.shared_docs import _shared_docs
+from snowflake.snowpark.modin.pandas.utils import _doc_binary_op
+from snowflake.snowpark.modin.utils import _create_operator_docstring
+
+_shared_doc_kwargs = {
+    "axes": "index",
+    "klass": "Series",
+    "axes_single_arg": "{0 or 'index'}",
+    "axis": """axis : {0 or 'index'}
+        Unused. Parameter needed for compatibility with DataFrame.""",
+    "inplace": """inplace : bool, default False
+        If True, performs operation inplace and returns None.""",
+    "unique": "np.ndarray",
+    "duplicated": "Series",
+    "optional_by": "",
+    "optional_reindex": """
+index : array-like, optional
+    New labels for the index. Preferably an Index object to avoid
+    duplicating data.
+axis : int or str, optional
+    Unused.""",
+}
+
+
+class Series:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    """
+    Snowpark pandas representation of `pandas.Series` with a lazily-evaluated relational dataset.
+
+    A Series is considered lazy because it encapsulates the computation or query required to produce
+    the final dataset. The computation is not performed until the datasets need to be displayed, or i/o
+    methods like to_pandas, to_snowflake are called.
+
+    Internally, the underlying data are stored as Snowflake table with rows and columns.
+
+    Parameters
+    ----------
+    data : modin.pandas.Series, array-like, Iterable, dict, or scalar value, optional
+        Contains data stored in Series. If data is a dict, argument order is
+        maintained.
+    index : array-like or Index (1d), optional
+        Values must be hashable and have the same length as `data`.
+    dtype : str, np.dtype, or pandas.ExtensionDtype, optional
+        Data type for the output Series. If not specified, this will be
+        inferred from `data`.
+    name : str, optional
+        The name to give to the Series.
+    copy : bool, default: False
+        Copy input data.
+    fastpath : bool, default: False
+        `pandas` internal parameter.
+    query_compiler : BaseQueryCompiler, optional
+        A query compiler object to create the Series from.
+
+    Examples
+    --------
+    Constructing Series from a dictionary with an Index specified
+
+    >>> d = {'a': 1, 'b': 2, 'c': 3}
+    >>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
+    >>> ser
+    a    1
+    b    2
+    c    3
+    dtype: int64
+
+    The keys of the dictionary match with the Index values, hence the Index
+    values have no effect.
+
+    >>> d = {'a': 1, 'b': 2, 'c': 3}
+    >>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
+    >>> ser
+    x   NaN
+    y   NaN
+    z   NaN
+    dtype: float64
+    """
+
+    @property
+    def name():
+        pass
+
+    @_doc_binary_op(operation="addition", bin_op="add")
+    def __add__():
+        pass
+
+    @_doc_binary_op(operation="addition", bin_op="radd", right="left")
+    def __radd__():
+        pass
+
+    @_doc_binary_op(operation="union", bin_op="and", right="other")
+    def __and__():
+        pass
+
+    @_doc_binary_op(operation="union", bin_op="and", right="other")
+    def __rand__():
+        pass
+
+    def __array__():
+        """
+        Return the values as a NumPy array.
+        """
+
+    def __contains__():
+        """
+        Check if `key` in the `Series.index`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to check the presence in the index.
+
+        Returns
+        -------
+        bool
+        """
+
+    def __copy__():
+        """
+        Return the copy of the Series.
+
+        Parameters
+        ----------
+        deep : bool, default: True
+            Whether the copy should be deep or not.
+
+        Returns
+        -------
+        Series
+        """
+
+    def __deepcopy__():
+        """
+        Return the deep copy of the Series.
+
+        Parameters
+        ----------
+        memo : Any, optional
+           Deprecated parameter.
+
+        Returns
+        -------
+        Series
+        """
+
+    def __delitem__():
+        """
+        Delete item identified by `key` label.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to delete.
+        """
+
+    def __divmod__():
+        pass
+
+    @_doc_binary_op(
+        operation="integer division and modulo",
+        bin_op="divmod",
+        right="left",
+        returns="tuple of two Series",
+    )
+    def __rdivmod__():
+        pass
+
+    @_doc_binary_op(operation="integer division", bin_op="floordiv")
+    def __floordiv__():
+        pass
+
+    @_doc_binary_op(operation="integer division", bin_op="floordiv")
+    def __rfloordiv__():
+        pass
+
+    def __getattr__():
+        """
+        Return item identified by `key`.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to get.
+
+        Returns
+        -------
+        Any
+
+        Notes
+        -----
+        First try to use `__getattribute__` method. If it fails
+        try to get `key` from `Series` fields.
+        """
+        pass
+
+    def abs():
+        """
+        Return a Series with absolute numeric value of each element.
+
+        Returns
+        -------
+        Series
+
+        Examples
+        --------
+        >>> ser = pd.Series([1, -2.29, 3, -4.77])
+        >>> ser
+        0    1.00
+        1   -2.29
+        2    3.00
+        3   -4.77
+        dtype: float64
+
+        >>> abs(ser)
+        0    1.00
+        1    2.29
+        2    3.00
+        3    4.77
+        dtype: float64
+        """
+
+    def __neg__():
+        """
+        Returns a Series with the sign changed for each element.
+
+        Returns
+        -------
+        Series
+
+        Examples
+        --------
+        >>> ser = pd.Series([1, -2.29, 3, -4.77])
+        >>> ser
+        0    1.00
+        1   -2.29
+        2    3.00
+        3   -4.77
+        dtype: float64
+
+        >>> - ser
+        0   -1.00
+        1    2.29
+        2   -3.00
+        3    4.77
+        dtype: float64
+        """
+
+    def __iter__():
+        """
+        Return an iterator of the values.
+
+        Returns
+        -------
+        iterable
+        """
+
+    @_doc_binary_op(operation="modulo", bin_op="mod")
+    def __mod__():
+        pass
+
+    @_doc_binary_op(operation="modulo", bin_op="mod", right="left")
+    def __rmod__():
+        pass
+
+    @_doc_binary_op(operation="multiplication", bin_op="mul")
+    def __mul__():
+        pass
+
+    @_doc_binary_op(operation="multiplication", bin_op="mul", right="left")
+    def __rmul__():
+        pass
+
+    @_doc_binary_op(operation="disjunction", bin_op="or", right="other")
+    def __or__():
+        pass
+
+    @_doc_binary_op(operation="disjunction", bin_op="or", right="other")
+    def __ror__():
+        pass
+
+    @_doc_binary_op(operation="exclusive or", bin_op="xor", right="other")
+    def __xor__():
+        pass
+
+    @_doc_binary_op(operation="exclusive or", bin_op="xor", right="other")
+    def __rxor__():
+        pass
+
+    @_doc_binary_op(operation="exponential power", bin_op="pow")
+    def __pow__():
+        pass
+
+    @_doc_binary_op(operation="exponential power", bin_op="pow", right="left")
+    def __rpow__():
+        pass
+
+    def __repr__():
+        """
+        Return a string representation for a particular Series.
+
+        Returns
+        -------
+        str
+        """
+
+    def __round__():
+        """
+        Round each value in a Series to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default: 0
+            Number of decimal places to round to.
+
+        Returns
+        -------
+        Series
+        """
+
+    def __setitem__():
+        """
+        Set `value` identified by `key` in the Series.
+
+        Parameters
+        ----------
+        key : hashable
+            Key to set.
+        value : Any
+            Value to set.
+
+        Examples
+        --------
+        Using the following series to set values on. __setitem__ is an inplace operation, so copies of `series`are made
+        in the examples to highlight the different behaviors produced.
+        >>> series = pd.Series([1, "b", 3], index=["a", "b", "c"])
+
+        Using a scalar as the value to set a particular element.
+        >>> s = series.copy()
+        >>> s["c"] = "a"
+        >>> s
+        a    1
+        b    b
+        c    a
+        dtype: object
+
+        Using list-like objects as the key and value to set multiple elements.
+        >>> s = series.copy()
+        >>> s[["c", "a"]] = ["foo", "bar"]
+        >>> s  # doctest: +SKIP
+        a    bar
+        b      2
+        c    foo
+        dtype: object
+
+        Having a duplicate label in the key.
+        >>> s = series.copy()
+        >>> s[["c", "a", "c"]] = pd.Index(["foo", "bar", "baz"])
+        >>> s  # doctest: +SKIP
+        a    bar
+        b      2
+        c    baz
+        dtype: object
+
+        When using a Series as the value, its index does not matter.
+        >>> s = series.copy()  # doctest: +SKIP
+        >>> s[["a", "b"]] = pd.Series([9, 8], index=["foo", "bar"])
+        >>> s  # doctest: +SKIP
+        a    9
+        b    8
+        c    3
+        dtype: int64
+        """
+
+    @_doc_binary_op(operation="subtraction", bin_op="sub")
+    def __sub__():
+        pass
+
+    @_doc_binary_op(operation="subtraction", bin_op="sub", right="left")
+    def __rsub__():
+        pass
+
+    @_doc_binary_op(operation="floating division", bin_op="truediv")
+    def __truediv__():
+        pass
+
+    @_doc_binary_op(operation="floating division", bin_op="truediv", right="left")
+    def __rtruediv__():
+        pass
+
+    __iadd__ = __add__
+    __imul__ = __add__
+    __ipow__ = __pow__
+    __isub__ = __sub__
+    __itruediv__ = __truediv__
+
+    @_create_operator_docstring(pandas.core.series.Series.add, overwrite_existing=True)
+    def add():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.radd, overwrite_existing=True)
+    def radd():
+        pass
+
+    def add_prefix():
+        """
+        Prefix labels with string `prefix`.
+
+
+        For Series, the row labels are prefixed.
+        For DataFrame, the column labels are prefixed.
+
+        Parameters
+        ----------
+        prefix : str
+            The string to add before each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_suffix: Suffix row labels with string `suffix`.
+        DataFrame.add_suffix: Suffix column labels with string `suffix`.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> s.add_prefix('item_')
+        item_0    1
+        item_1    2
+        item_2    3
+        item_3    4
+        dtype: int64
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+
+        >>> df.add_prefix('col_')
+           col_A  col_B
+        0      1      3
+        1      2      4
+        2      3      5
+        3      4      6
+        """
+
+    def add_suffix():
+        """
+        Suffix labels with string `suffix`.
+
+        For Series, the row labels are suffixed.
+        For DataFrame, the column labels are suffixed.
+
+        Parameters
+        ----------
+        suffix : str
+            The string to add after each label.
+
+        Returns
+        -------
+        Series or DataFrame
+            New Series or DataFrame with updated labels.
+
+        See Also
+        --------
+        Series.add_prefix: Prefix row labels with string `prefix`.
+        DataFrame.add_prefix: Prefix column labels with string `prefix`.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> s.add_suffix('_item')
+        0_item    1
+        1_item    2
+        2_item    3
+        3_item    4
+        dtype: int64
+
+        >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
+        >>> df
+           A  B
+        0  1  3
+        1  2  4
+        2  3  5
+        3  4  6
+
+        >>> df.add_suffix('_col')
+           A_col  B_col
+        0      1      3
+        1      2      4
+        2      3      5
+        3      4      6
+        """
+
+    def drop():
+        """
+        Return Series with specified index labels removed.
+
+        Remove elements of a Series based on specifying the index labels.
+        When using a MultiIndex, labels on different levels can be removed
+        by specifying the level.
+
+        Parameters
+        ----------
+        labels : single label or list-like
+            Index labels to drop.
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        index : single label or list-like
+            Redundant for application on Series, but 'index' can be used instead
+            of 'labels'.
+        columns : single label or list-like
+            No change is made to the Series; use 'index' or 'labels' instead.
+        level : int or level name, optional
+            For MultiIndex, level for which the labels will be removed.
+        inplace : bool, default False
+            If True, do operation inplace and return None.
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and only existing labels are dropped.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or None
+            Series with specified index labels removed or None if ``inplace=True``.
+
+        Raises
+        ------
+        KeyError
+            If none of the labels are found in the index.
+
+        See Also
+        --------
+        Series.reindex : Return only specified index labels of Series.
+        Series.dropna : Return series without null values.
+        Series.drop_duplicates : Return Series with duplicate values removed.
+        DataFrame.drop : Drop specified labels from rows or columns.
+
+        Examples
+        --------
+        >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
+        >>> s
+        A    0
+        B    1
+        C    2
+        dtype: int64
+
+        Drop labels B en C
+
+        >>> s.drop(labels=['B', 'C'])
+        A    0
+        dtype: int64
+
+        Drop 2nd level label in MultiIndex Series
+
+        >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
+        ...                              ['speed', 'weight', 'length']],
+        ...                      codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
+        ...                             [0, 1, 2, 0, 1, 2, 0, 1, 2]])
+        >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
+        ...               index=midx)
+        >>> s
+        lama    speed      45.0
+                weight    200.0
+                length      1.2
+        cow     speed      30.0
+                weight    250.0
+                length      1.5
+        falcon  speed     320.0
+                weight      1.0
+                length      0.3
+        dtype: float64
+
+        >>> s.drop(labels='weight', level=1)
+        lama    speed      45.0
+                length      1.2
+        cow     speed      30.0
+                length      1.5
+        falcon  speed     320.0
+                length      0.3
+        dtype: float64
+        """
+
+    _agg_examples_doc = dedent(
+        """
+    Examples
+    --------
+    >>> s = pd.Series([1, 2, 3, 4])
+    >>> s
+    0    1
+    1    2
+    2    3
+    3    4
+    dtype: int64
+
+    >>> s.agg('min')
+    1
+
+    >>> s.agg(['min', 'max'])
+    min    1
+    max    4
+    dtype: int64
+    """
+    )
+
+    @doc(
+        _shared_docs["aggregate"],
+        klass=_shared_doc_kwargs["klass"],
+        axis=_shared_doc_kwargs["axis"],
+        examples=_agg_examples_doc,
+    )
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def apply():
+        """
+        Invoke function on values of Series.
+
+        Can be ufunc (a NumPy function that applies to the entire Series)
+        or a Python function that only works on single values.
+
+        Parameters
+        ----------
+        func : function
+            Python function or NumPy ufunc to apply.
+        convert_dtype : bool, default None
+            Try to find better dtype for elementwise function results. convert_dtype has been
+            ignored with Snowflake execution engine.
+        args : tuple
+            Positional arguments passed to func after the series value.
+        **kwargs
+            Additional keyword arguments passed to func.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+            If func returns a Series object the result will be a DataFrame.
+
+
+        See Also
+        --------
+        :func:`Series.map <snowflake.snowpark.modin.pandas.Series.map>` : For applying more complex functions on a Series.
+
+        :func:`DataFrame.apply <snowflake.snowpark.modin.pandas.DataFrame.apply>` : Apply a function row-/column-wise.
+
+        :func:`DataFrame.applymap <snowflake.snowpark.modin.pandas.DataFrame.applymap>` : Apply a function elementwise on a whole DataFrame.
+
+        Notes
+        -----
+        1. When the type annotation of return value is provided on ``func``, the result will be cast
+        to the corresponding dtype. When no type annotation is provided, data will be converted
+        to Variant type in Snowflake and leave as dtype=object. In this case, the return value must
+        be JSON-serializable.
+
+        2. Under the hood, we use `Snowflake Vectorized Python UDFs <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch>`_.
+        to implement apply() method. You can find type mappings from Snowflake SQL types to pandas
+        dtypes `here <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch#type-support>`_.
+
+        3. Snowflake supports two types of NULL values in variant data: `JSON NULL and SQL NULL <https://docs.snowflake.com/en/user-guide/semistructured-considerations#null-values>`_.
+        When no type annotation is provided and Variant data is returned, Python ``None`` is translated to
+        JSON NULL, and all other pandas missing values (np.nan, pd.NA, pd.NaT) are translated to SQL NULL.
+
+        4. For working with 3rd-party-packages see :func:`DataFrame.apply <snowflake.snowpark.modin.pandas.DataFrame.apply>`.
+        """
+
+    def argmax():
+        """
+        Return int position of the largest value in the Series.
+        """
+
+    def argmin():
+        """
+        Return int position of the smallest value in the Series.
+        """
+
+    def argsort():
+        """
+        Return the integer indices that would sort the Series values.
+        """
+
+    def autocorr():
+        """
+        Compute the lag-N autocorrelation.
+        """
+
+    def between():
+        """
+        Return boolean Series equivalent to left <= series <= right.
+        """
+
+    def compare():
+        """
+        Compare to another Series and show the differences.
+        """
+
+    def corr():
+        """
+        Compute correlation with `other` Series, excluding missing values.
+        """
+
+    def count():
+        """
+        Return number of non-NA/null observations in the Series.
+
+        Returns
+        -------
+        int
+            Number of non-null values in the Series.
+
+        See Also
+        --------
+        DataFrame.count : Count non-NA cells for each column or row.
+
+        Examples
+        --------
+        >>> s = pd.Series([0.0, 1.0, np.nan])
+        >>> s.count()
+        2
+        """
+        pass
+
+    def cov():
+        """
+        Compute covariance with Series, excluding missing values.
+        """
+
+    def describe():
+        """
+        Generate descriptive statistics.
+
+        For non-numeric datasets, computes `count` (# of non-null items), `unique` (# of unique items),
+        `top` (the mode; the element at the lowest position if multiple), and `freq` (# of times the mode appears).
+
+        For numeric datasets, computes `count` (# of non-null items), `mean`, `std`, `min`,
+        the specified percentiles, and `max`.
+
+        Parameters
+        ----------
+        percentiles: Optional[ListLike], default None
+            The percentiles to compute for numeric columns. If unspecified, defaults to [0.25, 0.5, 0.75],
+            which returns the 25th, 50th, and 75th percentiles. All values should fall between 0 and 1.
+            The median (0.5) will always be added to the displayed percentile if not already included;
+            the min and max are always displayed in addition to the percentiles.
+        include: Optional[List[str, ExtensionDtype | np.dtype]] | "all", default None
+            Ignored for Series.
+        exclude: Optional[List[str, ExtensionDtype | np.dtype]], default None
+            Ignored for Series.
+
+        Returns
+        -------
+        Series
+            A series containing statistics for the dataset.
+
+        Examples
+        --------
+        Describing numeric data:
+
+        >>> pd.Series([1, 2, 3]).describe()  # doctest: +NORMALIZE_WHITESPACE
+        count    3.0
+        mean     2.0
+        std      1.0
+        min      1.0
+        25%      1.5
+        50%      2.0
+        75%      2.5
+        max      3.0
+        dtype: float64
+
+        Describing non-numeric data:
+
+        >>> pd.Series(['a', 'b', 'c']).describe()  # doctest: +NORMALIZE_WHITESPACE
+        count     3
+        unique    3
+        top       a
+        freq      1
+        dtype: object
+        """
+
+    def diff():
+        """
+        First discrete difference of element.
+
+        Calculates the difference of a Series element compared with another element in the Series (default is element in previous row).
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for calculating difference, accepts negative values.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series`
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` with the first differences of the Series.
+
+        Notes
+        -----
+        For boolean dtypes, this uses operator.xor() rather than operator.sub(). The result is calculated according
+        to current dtype in Series, however dtype of the result is always float64.
+
+        Examples
+        --------
+        Difference with previous row
+
+        >>> s = pd.Series([1, 1, 2, 3, 5, 8])
+        >>> s.diff()
+        0    NaN
+        1    0.0
+        2    1.0
+        3    1.0
+        4    2.0
+        5    3.0
+        dtype: float64
+
+        Difference with 3rd previous row
+
+        >>> s.diff(periods=3)
+        0    NaN
+        1    NaN
+        2    NaN
+        3    2.0
+        4    4.0
+        5    6.0
+        dtype: float64
+
+        Difference with following row
+
+        >>> s.diff(periods=-1)
+        0    0.0
+        1   -1.0
+        2   -1.0
+        3   -2.0
+        4   -3.0
+        5    NaN
+        dtype: float64
+        """
+
+    def divmod():
+        """
+        Return Integer division and modulo of series and `other`, element-wise (binary operator `divmod`).
+        Not implemented
+        """
+
+    def dot():
+        """
+        Compute the dot product between the Series and the columns of `other`.
+        """
+
+    def drop_duplicates():
+        """
+        Return Series with duplicate values removed.
+
+        Parameters
+        ----------
+        keep : {'first', 'last', False}, default 'first'
+            Method to handle dropping duplicates:
+            'first' : Drop duplicates except for the first occurrence.
+            'last' : Drop duplicates except for the last occurrence.
+            False : Drop all duplicates.
+        inplace : bool, default False
+            If True, performs operation inplace and returns None.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+        Returns
+        -------
+        Series or None
+            Series with duplicates dropped or None if inplace=True.
+
+        Examples
+        --------
+        Generate a Series with duplicated entries.
+
+        >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'],
+        ...                 name='animal')
+        >>> s
+        0     llama
+        1       cow
+        2     llama
+        3    beetle
+        4     llama
+        5     hippo
+        Name: animal, dtype: object
+
+        With the 'keep' parameter, the selection behaviour of duplicated values can be changed. The value 'first' keeps the first occurrence for each set of duplicated entries. The default value of keep is 'first'.
+
+        >>> s.drop_duplicates()
+        0     llama
+        1       cow
+        3    beetle
+        5     hippo
+        Name: animal, dtype: object
+
+        The value 'last' for parameter 'keep' keeps the last occurrence for each set of duplicated entries.
+
+        >>> s.drop_duplicates(keep='last')
+        1       cow
+        3    beetle
+        4     llama
+        5     hippo
+        Name: animal, dtype: object
+
+        The value False for parameter 'keep' discards all sets of duplicated entries.
+
+        >>> s.drop_duplicates(keep=False)
+        1       cow
+        3    beetle
+        5     hippo
+        Name: animal, dtype: object
+        """
+
+    def dropna():
+        """
+        Return a new Series with missing values removed.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        inplace : bool, default False
+            If True, do operation inplace and return None.
+        how : str, optional
+            Not in use. Kept for compatibility.
+
+        Returns
+        -------
+        Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` or None
+            Series with NA entries dropped from it or None if ``inplace=True``.
+
+        See Also
+        --------
+        Series.isna: Indicate missing values.
+        Series.notna : Indicate existing (non-missing) values.
+        Series.fillna : Replace missing values.
+        DataFrame.dropna : Drop rows or columns which contain NA values.
+        Index.dropna : Drop missing indices.
+
+        Examples
+        --------
+        >>> ser = pd.Series([1., 2., np.nan])
+        >>> ser
+        0    1.0
+        1    2.0
+        2    NaN
+        dtype: float64
+
+        Drop NA values from a Series.
+
+        >>> ser.dropna()
+        0    1.0
+        1    2.0
+        dtype: float64
+
+        Keep the Series with valid entries in the same variable.
+
+        >>> ser.dropna(inplace=True)
+        >>> ser
+        0    1.0
+        1    2.0
+        dtype: float64
+
+        Empty strings are not considered NA values. ``None`` is considered an
+        NA value.
+
+        >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
+        >>> ser  # doctest: +NORMALIZE_WHITESPACE
+        0      None
+        1         2
+        2      None
+        3
+        4      None
+        5    I stay
+        dtype: object
+        >>> ser.dropna()  # doctest: +NORMALIZE_WHITESPACE
+        1         2
+        3
+        5    I stay
+        dtype: object
+        """
+
+    def duplicated():
+        """
+        Indicate duplicate Series values.
+
+        Duplicated values are indicated as ``True`` values in the resulting
+        Series. Either all duplicates, all except the first or all except the
+        last occurrence of duplicates can be indicated.
+
+        Parameters
+        ----------
+        keep : {'first', 'last', False}, default 'first'
+            Method to handle dropping duplicates:
+
+            - 'first' : Mark duplicates as ``True`` except for the first
+              occurrence.
+            - 'last' : Mark duplicates as ``True`` except for the last
+              occurrence.
+            - ``False`` : Mark all duplicates as ``True``.
+
+        Returns
+        -------
+        Snowpark pandas Series[bool]
+            Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.Series` indicating whether each value has occurred
+            in the preceding values.
+
+        See Also
+        --------
+        Index.duplicated : Equivalent method on pandas.Index.
+        DataFrame.duplicated : Equivalent method on pandas.DataFrame.
+        Series.drop_duplicates : Remove duplicate values from Series.
+
+        Examples
+        --------
+        By default, for each set of duplicated values, the first occurrence is
+        set on False and all others on True:
+
+        >>> animals = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama'])
+        >>> animals.duplicated()
+        0    False
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+
+        which is equivalent to
+
+        >>> animals.duplicated(keep='first')
+        0    False
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+
+        By using 'last', the last occurrence of each set of duplicated values
+        is set on False and all others on True:
+
+        >>> animals.duplicated(keep='last')
+        0     True
+        1    False
+        2     True
+        3    False
+        4    False
+        dtype: bool
+
+        By setting keep on ``False``, all duplicates are True:
+
+        >>> animals.duplicated(keep=False)
+        0     True
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.eq, overwrite_existing=True)
+    def eq():
+        pass
+
+    def equals():
+        """
+        Test whether two objects contain the same elements.
+        """
+
+    def explode():
+        """
+        Transform each element of a list-like to a row.
+        """
+
+    def factorize():
+        """
+        Encode the object as an enumerated type or categorical variable.
+        """
+
+    def fillna():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        value : scalar, dict, Series, or DataFrame
+            Value to use to fill holes (e.g. 0), alternately a
+            dict/Series/DataFrame of values specifying which value to use for
+            each index (for a Series) or column (for a DataFrame).  Values not
+            in the dict/Series/DataFrame will not be filled. This value cannot
+            be a list.
+        method : {{'backfill', 'bfill', 'ffill', None}}, default None
+            Method to use for filling holes in reindexed Series:
+
+            * ffill: propagate last valid observation forward to next valid.
+            * backfill / bfill: use next valid observation to fill gap.
+
+            .. deprecated:: 2.1.0
+                Use ffill or bfill instead.
+
+        axis : {axes_single_arg}
+            Axis along which to fill missing values. For `Series`
+            this parameter is unused and defaults to 0.
+        inplace : bool, default False
+            If True, fill in-place. Note: this will modify any
+            other views on this object (e.g., a no-copy slice for a column in a
+            DataFrame).
+        limit : int, default None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled. Must be greater than 0 if not None.
+        downcast : dict, default is None
+            A dict of item->dtype of what to downcast if possible,
+            or the string 'infer' which will try to downcast to an appropriate
+            equal type (e.g. float64 to int64 if possible).
+
+            .. deprecated:: 2.2.0
+
+        Returns
+        -------
+        {klass} or None
+            Object with missing values filled or None if ``inplace=True``.
+
+        See Also
+        --------
+        ffill : Fill values by propagating the last valid observation to next valid.
+        bfill : Fill values by using the next valid observation to fill the gap.
+        interpolate : Fill NaN values using interpolation.
+        reindex : Conform object to new index.
+        asfreq : Convert TimeSeries to specified frequency.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
+        ...                    [3, 4, np.nan, 1],
+        ...                    [np.nan, np.nan, np.nan, np.nan],
+        ...                    [np.nan, 3, np.nan, 4]],
+        ...                   columns=list("ABCD"))
+        >>> df
+             A    B   C    D
+        0  NaN  2.0 NaN  0.0
+        1  3.0  4.0 NaN  1.0
+        2  NaN  NaN NaN  NaN
+        3  NaN  3.0 NaN  4.0
+
+        Replace all NaN elements with 0s.
+
+        >>> df.fillna(0)
+             A    B    C    D
+        0  0.0  2.0  0.0  0.0
+        1  3.0  4.0  0.0  1.0
+        2  0.0  0.0  0.0  0.0
+        3  0.0  3.0  0.0  4.0
+
+        Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
+        2, and 3 respectively.
+
+        >>> values = {"A": 0, "B": 1, "C": 2, "D": 3}
+        >>> df.fillna(value=values)
+             A    B    C    D
+        0  0.0  2.0  2.0  0.0
+        1  3.0  4.0  2.0  1.0
+        2  0.0  1.0  2.0  3.0
+        3  0.0  3.0  2.0  4.0
+
+        Only replace the first NaN element.
+
+        >>> df.fillna(value=values, limit=1)  # doctest: +SKIP
+             A    B    C    D
+        0  0.0  2.0  2.0  0.0
+        1  3.0  4.0  NaN  1.0
+        2  NaN  1.0  NaN  3.0
+        3  NaN  3.0  NaN  4.0
+
+        When filling using a DataFrame, replacement happens along
+        the same column names and same indices
+
+        >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE"))
+        >>> df.fillna(df2)
+             A    B    C    D
+        0  0.0  2.0  0.0  0.0
+        1  3.0  4.0  0.0  1.0
+        2  0.0  0.0  0.0  NaN
+        3  0.0  3.0  0.0  4.0
+
+        Note that column D is not affected since it is not present in df2.
+        """
+
+    @_create_operator_docstring(
+        pandas.core.series.Series.floordiv, overwrite_existing=True
+    )
+    def floordiv():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.ge, overwrite_existing=True)
+    def ge():
+        pass
+
+    def groupby():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Group Series using a mapper or by a Series of columns.
+
+        Args:
+            by: mapping, function, label, Snowpark pandas Series or a list of such. Used to determine the groups for the groupby.
+                If by is a function, it’s called on each value of the object’s index. If a dict or Series is
+                passed, the Series or dict VALUES will be used to determine the groups (the Series’ values are first aligned;
+                see .align() method). If a list or ndarray of length equal to the selected axis is passed (see the groupby
+                user guide), the values are used as-is to determine the groups. A label or list of labels may be passed
+                to group by the columns in self. Notice that a tuple is interpreted as a (single) key.
+
+            axis: {0 or ‘index’, 1 or ‘columns’}, default 0
+                Split along rows (0) or columns (1). For Series this parameter is unused and defaults to 0.
+
+            level: int, level name, or sequence of such, default None
+                If the axis is a MultiIndex (hierarchical), group by a particular level or levels. Do not specify both by and level.
+
+            as_index: bool, default True
+                    For aggregated output, return object with group labels as the index. Only relevant for DataFrame input.
+                    as_index=False is effectively “SQL-style” grouped output.
+
+            sort: bool, default True
+                Sort group keys. Groupby preserves the order of rows within each group. Note that in pandas,
+                better performance can be achieved by turning sort off, this is not going to be true with Snowpark
+                pandas API. When sort=False, the performance will be no better than sort=True.
+
+            group_keys: bool, default True
+                    When calling apply and the by argument produces a like-indexed (i.e. a transform) result, add group
+                    keys to index to identify pieces. By default, group keys are not included when the result’s index
+                    (and column) labels match the inputs, and are included otherwise.
+
+            observed: bool, default False
+                    This only applies if any of the groupers are Categoricals. If True: only show observed values for
+                    categorical groupers. If False: show all values for categorical groupers. This parameter is
+                    currently ignored with Snowpark pandas API, since Category type is currently not supported with
+                    Snowpark pandas API.
+
+            dropna: bool, default True
+                    If True, and if group keys contain NA values, NA values together with row/column will be dropped.
+                    If False, NA values will also be treated as the key in groups.
+
+        Returns:
+            Snowpark pandas SeriesGroupBy: Returns a groupby object that contains information about the groups.
+
+        Examples::
+            >>> ser = pd.Series([390., 350., 30., 20.],
+            ...                 index=['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+            ...                 name="Max Speed")
+            >>> ser
+            Falcon    390.0
+            Falcon    350.0
+            Parrot     30.0
+            Parrot     20.0
+            Name: Max Speed, dtype: float64
+            >>> ser.groupby(["a", "b", "a", "b"]).mean()  # doctest: +SKIP
+            a    210.0
+            b    185.0
+            Name: Max Speed, dtype: float64
+            >>> ser.groupby(level=0).mean()
+            Falcon    370.0
+            Parrot     25.0
+            Name: Max Speed, dtype: float64
+
+            **Grouping by Indexes**
+
+            We can groupby different levels of a hierarchical index
+            using the `level` parameter:
+
+            >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+            ...           ['Captive', 'Wild', 'Captive', 'Wild']]
+            >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+            >>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
+            >>> ser    # doctest: +NORMALIZE_WHITESPACE
+            Animal  Type
+            Falcon  Captive    390.0
+                    Wild       350.0
+            Parrot  Captive     30.0
+                    Wild        20.0
+            Name: Max Speed, dtype: float64
+            >>> ser.groupby(level=0).mean()     # doctest: +NORMALIZE_WHITESPACE
+            Animal
+            Falcon    370.0
+            Parrot     25.0
+            Name: Max Speed, dtype: float64
+            >>> ser.groupby(level="Type").mean()        # doctest: +NORMALIZE_WHITESPACE
+            Type
+            Captive    210.0
+            Wild       185.0
+            Name: Max Speed, dtype: float64
+
+            We can also choose to include `NA` in group keys or not by defining
+            `dropna` parameter, the default setting is `True`.
+
+            >>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
+            >>> ser.groupby(level=0).sum()      # doctest: +SKIP
+            a    3
+            b    3
+            dtype: int64
+
+            >>> ser.groupby(level=0, dropna=False).sum()        # doctest: +SKIP
+            a    3
+            b    3
+            NaN  3
+            dtype: int64
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.gt, overwrite_existing=True)
+    def gt():
+        pass
+
+    def hist():
+        """
+        Draw histogram of the input series using matplotlib.
+        """
+
+    def idxmax():
+        """
+        Return the row label of the maximum value.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire Series is NA, the result will be NA.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Index, the label of the maximum value.
+
+        Examples
+        --------
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmax()
+        'C'
+        """
+
+    def idxmin():
+        """
+        Return the row label of the minimum value.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire Series is NA, the result will be NA.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Index, the label of the minimum value.
+
+        Examples
+        --------
+        >>> s = pd.Series(data=[1, None, 4, 3, 4],
+        ...               index=['A', 'B', 'C', 'D', 'E'])
+        >>> s.idxmin()
+        'A'
+        """
+
+    def info():
+        pass
+
+    def interpolate():
+        """
+        Fill NaN values using an interpolation method.
+        """
+
+    def item():
+        """
+        Return the first element of the underlying data as a Python scalar.
+        """
+
+    def items():
+        """
+        Lazily iterate over (index, value) tuples.
+        """
+
+    def keys():
+        """
+        Return alias for index.
+        """
+
+    def kurt():
+        """
+        Return unbiased kurtosis over requested axis.
+        """
+
+    kurtosis = kurt
+
+    @_create_operator_docstring(pandas.core.series.Series.le, overwrite_existing=True)
+    def le():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.lt, overwrite_existing=True)
+    def lt():
+        pass
+
+    def map():
+        """
+        Map values of Series according to an input mapping or function.
+
+        Used for substituting each value in a Series with another value,
+        that may be derived from a function, a ``dict`` or
+        a :class:`Series`.
+
+        Parameters
+        ----------
+        arg : function, collections.abc.Mapping subclass or Series
+            Mapping correspondence.
+        na_action : {None, 'ignore'}, default None
+            If 'ignore', propagate NULL values, without passing them to the
+            mapping correspondence. Note that, it will not bypass NaN values
+            in a FLOAT column in Snowflake.
+
+        Returns
+        -------
+        Series
+            Same index as caller.
+
+        See Also
+        --------
+        :func:`Series.apply <snowflake.snowpark.modin.pandas.Series.apply>` : For applying more complex functions on a Series.
+
+        :func:`DataFrame.apply <snowflake.snowpark.modin.pandas.DataFrame.apply>` : Apply a function row-/column-wise.
+
+        :func:`DataFrame.applymap <snowflake.snowpark.modin.pandas.DataFrame.applymap>` : Apply a function elementwise on a whole DataFrame.
+
+        Notes
+        -----
+        When ``arg`` is a dictionary, values in Series that are not in the
+        dictionary (as keys) are converted to ``NaN``. However, if the
+        dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
+        provides a method for default values), then this default is used
+        rather than ``NaN``.
+
+        Examples
+        --------
+        >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
+        >>> s
+        0       cat
+        1       dog
+        2      None
+        3    rabbit
+        dtype: object
+
+        ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
+        in the ``dict`` are converted to ``NaN``, unless the dict has a default
+        value (e.g. ``defaultdict``):
+
+        >>> s.map({'cat': 'kitten', 'dog': 'puppy'})  # doctest: +SKIP
+        0    kitten
+        1     puppy
+        2      None
+        3      None
+        dtype: object
+
+        It also accepts a function:
+
+        >>> s.map('I am a {}'.format)
+        0       I am a cat
+        1       I am a dog
+        2      I am a <NA>
+        3    I am a rabbit
+        dtype: object
+
+        To avoid applying the function to missing values (and keep them as
+        ``NaN``) ``na_action='ignore'`` can be used:
+
+        >>> s.map('I am a {}'.format, na_action='ignore')  # doctest: +SKIP
+        0       I am a cat
+        1       I am a dog
+        2             None
+        3    I am a rabbit
+        dtype: object
+
+        Note that in the above example, the missing value in Snowflake is NULL,
+        it is mapped to ``None`` in a string/object column.
+        """
+
+    def mask():
+        """
+        Replace values where the condition is True.
+
+        Args:
+            cond: bool Series/DataFrame, array-like or callable
+                Where cond is False, keep the original value. Where True, replace with corresponding value from other.
+                If cond is callable, it is computed on the Series/DataFrame and should return boolean Series/DataFrame
+                or array. The callable must not change input Series/DataFrame (though pandas doesn't check it).
+
+            other: scalar, Series/DataFrame, or callable
+                Entries where cond is True are replaced with corresponding value from other. If other is callable,
+                it is computed on the Series/DataFrame and should return scalar or Series/DataFrame. The callable
+                must not change input Series/DataFrame (though pandas doesn’t check it).
+
+            inplace: bool, default False
+                Whether to perform the operation in place on the data.
+
+            axis: int, default None
+                Alignment axis if needed. For Series this parameter is unused and defaults to 0.
+
+            level: int, default None
+                Alignment level if needed.
+
+        Returns:
+            Same type as caller or None if inplace=True.
+
+        See Also:
+            Series.where : Replace values where the condition is False.
+
+        Notes:
+            The mask method is an application of the if-then idiom. For each element in the calling DataFrame, if cond
+            is False the element is used; otherwise the corresponding element from the DataFrame other is used. If the
+            axis of other does not align with axis of cond Series/DataFrame, the misaligned index positions will be
+            filled with True.
+
+            The signature for DataFrame.where() differs from numpy.where(). Roughly df1.where(m, df2) is equivalent to
+            np.where(m, df1, df2).
+
+            For further details and examples see the mask documentation in indexing.
+            The dtype of the object takes precedence. The fill value is casted to the object’s dtype, if this can be
+            done losslessly.
+
+        Examples::
+        >>> s = pd.Series(range(5))
+        >>> s.mask(s > 0)  # doctest: +NORMALIZE_WHITESPACE
+        0    0.0
+        1    NaN
+        2    NaN
+        3    NaN
+        4    NaN
+        dtype: float64
+
+        >>> s = pd.Series(range(5))
+        >>> t = pd.Series([True, False])
+        >>> s.mask(t, 99)  # doctest: +NORMALIZE_WHITESPACE
+        0    99
+        1     1
+        2    99
+        3    99
+        4    99
+        dtype: int64
+
+        >>> s.mask(s > 1, 10)  # doctest: +NORMALIZE_WHITESPACE
+        0     0
+        1     1
+        2    10
+        3    10
+        4    10
+        dtype: int64
+        """
+
+    def memory_usage():
+        """
+        Return the memory usage of the Series.
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.mod, overwrite_existing=True)
+    def mod():
+        pass
+
+    def mode():
+        """
+        Return the mode(s) of the Series.
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.mul, overwrite_existing=True)
+    def mul():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.rmul, overwrite_existing=True)
+    def rmul():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.ne, overwrite_existing=True)
+    def ne():
+        pass
+
+    def nlargest():
+        """
+        Return the largest `n` elements.
+        """
+
+    def nsmallest():
+        """
+        Return the smallest `n` elements.
+        """
+
+    def set_axis():
+        """
+        Assign desired index to given axis.
+
+        Parameters
+        ----------
+        labels : list-like, Index, MultiIndex
+            The values for the new index.
+        axis : {index (0), rows(0)}, default 0
+            Axis for the function to be applied on.
+            For `Series` this parameter is unused and defaults to 0.
+        copy : bool, default True
+            this parameter is unused.
+
+        Returns
+        -------
+        Series
+
+        Examples
+        --------
+        >>> ser = pd.Series(["apple", "banana", "cauliflower"])
+        >>> ser.set_axis(["A:", "B:", "C:"], axis="index")
+        A:          apple
+        B:         banana
+        C:    cauliflower
+        dtype: object
+
+        >>> ser.set_axis([1000, 45, -99.23], axis=0)
+         1000.00          apple
+         45.00           banana
+        -99.23      cauliflower
+        dtype: object
+        """
+
+    def unstack():
+        """
+        Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
+        """
+
+    @property
+    def plot():
+        """
+        Make plot of Series.
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.pow, overwrite_existing=True)
+    def pow():
+        pass
+
+    def prod():
+        pass
+
+    product = prod
+
+    def ravel():
+        """
+        Return the flattened underlying data as an ndarray.
+        """
+
+    def reindex():
+        pass
+
+    def rename():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Alter Series index labels or name.
+
+        Function / dict values must be unique (1-to-1). Labels not contained in
+        a dict / Series will be left as-is. Extra labels listed don't throw an
+        error.
+
+        Alternatively, change ``Series.name`` with a scalar value.
+
+        Parameters
+        ----------
+        index : scalar, hashable sequence, dict-like or function optional
+            Functions or dict-like are transformations to apply to
+            the index.
+            Scalar or hashable sequence-like will alter the ``Series.name``
+            attribute.
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        copy : bool, default True
+            Also copy underlying data. copy has been ignored with Snowflake execution engine.
+        inplace : bool, default False
+            Whether to return a new Series. If True the value of copy is ignored.
+        level : int or level name, default None
+            In case of MultiIndex, only rename labels in the specified level.
+        errors : {'ignore', 'raise'}, default 'ignore'
+            If 'raise', raise `KeyError` when a `dict-like mapper` or
+            `index` contains labels that are not present in the index being transformed.
+            If 'ignore', existing keys will be renamed and extra keys will be ignored.
+
+        Returns
+        -------
+        Series or None
+            Series with index labels or name altered or None if ``inplace=True``.
+
+        See Also
+        --------
+        DataFrame.rename : Corresponding DataFrame method.
+        Series.rename_axis : Set the name of the axis.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.rename("my_name")  # scalar, changes Series.name
+        0    1
+        1    2
+        2    3
+        Name: my_name, dtype: int64
+        >>> s.rename(lambda x: x ** 2)  # function, changes labels  # doctest: +SKIP
+        0    1
+        1    2
+        4    3
+        dtype: int8
+        >>> s.rename({1: 3, 2: 5})  # mapping, changes labels
+        0    1
+        3    2
+        5    3
+        dtype: int64
+        """
+
+    def repeat():
+        """
+        Repeat elements of a Series.
+        """
+
+    def reset_index():
+        """
+        Generate a new DataFrame or Series with the index reset.
+
+        This is useful when the index needs to be treated as a column, or
+        when the index is meaningless and needs to be reset to the default
+        before another operation.
+
+        Parameters
+        ----------
+        level : int, str, tuple, or list, default optional
+            For a Series with a MultiIndex, only remove the specified levels
+            from the index. Removes all levels by default.
+        drop : bool, default False
+            Just reset the index, without inserting it as a column in
+            the new DataFrame.
+        name : object, optional
+            The name to use for the column containing the original Series
+            values. Uses ``self.name`` by default. This argument is ignored
+            when `drop` is True.
+        inplace : bool, default False
+            Modify the Series in place (do not create a new object).
+        allow_duplicates : bool, default False
+            Allow duplicate column labels to be created.
+
+        Returns
+        -------
+        Series or DataFrame or None
+            When `drop` is False (the default), a DataFrame is returned.
+            The newly created columns will come first in the DataFrame,
+            followed by the original Series values.
+            When `drop` is True, a `Series` is returned.
+            In either case, if ``inplace=True``, no value is returned.
+
+        See Also
+        --------
+        DataFrame.reset_index: Analogous function for DataFrame.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4], name='foo',
+        ...               index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
+
+        Generate a DataFrame with default index.
+
+        >>> s.reset_index()
+          idx  foo
+        0   a    1
+        1   b    2
+        2   c    3
+        3   d    4
+
+        To specify the name of the new column use `name`.
+
+        >>> s.reset_index(name='values')
+          idx  values
+        0   a       1
+        1   b       2
+        2   c       3
+        3   d       4
+
+        To generate a new Series with the default set `drop` to True.
+
+        >>> s.reset_index(drop=True)
+        0    1
+        1    2
+        2    3
+        3    4
+        Name: foo, dtype: int64
+
+        To update the Series in place, without generating a new one
+        set `inplace` to True. Note that it also requires ``drop=True``.
+
+        >>> s.reset_index(inplace=True, drop=True)
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        Name: foo, dtype: int64
+
+        The `level` parameter is interesting for Series with a multi-level
+        index.
+
+        >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
+        ...           np.array(['one', 'two', 'one', 'two'])]
+        >>> s2 = pd.Series(
+        ...     range(4), name='foo',
+        ...     index=pd.MultiIndex.from_arrays(arrays,
+        ...                                     names=['a', 'b']))
+
+        To remove a specific level from the Index, use `level`.
+
+        >>> s2.reset_index(level='a')  # doctest: +NORMALIZE_WHITESPACE
+               a  foo
+        b
+        one  bar    0
+        two  bar    1
+        one  baz    2
+        two  baz    3
+
+        If `level` is not set, all levels are removed from the Index.
+
+        >>> s2.reset_index()
+             a    b  foo
+        0  bar  one    0
+        1  bar  two    1
+        2  baz  one    2
+        3  baz  two    3
+        """
+
+    def rdivmod():
+        """
+        Return integer division and modulo of series and `other`, element-wise (binary operator `rdivmod`).
+
+        not yet implemented
+        """
+
+    @_create_operator_docstring(
+        pandas.core.series.Series.rfloordiv, overwrite_existing=True
+    )
+    def rfloordiv():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.rmod, overwrite_existing=True)
+    def rmod():
+        pass
+
+    def round():
+        """
+        Round each value in a Series to the given number of decimals.
+
+        Parameters
+        ----------
+        decimals : int, default 0
+            Number of decimal places to round to. If decimals is negative, it specifies the number of positions to the left of the decimal point.
+        *args, **kwargs
+            Additional arguments and keywords have no effect but might be accepted for compatibility with NumPy.
+
+        Returns
+        -------
+        Series
+            Rounded values of the Series.
+
+        See Also
+        --------
+            numpy.around : Round values of an np.array.
+            DataFrame.round : Round values of a DataFrame.
+
+        Examples
+        --------
+        >>> s = pd.Series([0.1, 1.3, 2.7])
+        >>> s.round()
+        0    0.0
+        1    1.0
+        2    3.0
+        dtype: float64
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.rpow, overwrite_existing=True)
+    def rpow():
+        pass
+
+    @_create_operator_docstring(pandas.core.series.Series.rsub, overwrite_existing=True)
+    def rsub():
+        pass
+
+    @_create_operator_docstring(
+        pandas.core.series.Series.rtruediv, overwrite_existing=True
+    )
+    def rtruediv():
+        pass
+
+    rdiv = rtruediv
+
+    def quantile():
+        """
+        Return value at the given quantile.
+
+        Parameters
+        ----------
+        q: float or array-like of float, default 0.5
+            Value between 0 <= q <= 1, the quantile(s) to compute.
+            Currently unsupported if q is a Snowpandas DataFrame or Series.
+        interpolation: {"linear", "lower", "higher", "midpoint", "nearest"}, default "linear"
+            Specifies the interpolation method to use if a quantile lies between two data points
+            *i* and *j*:
+
+            * linear: *i* + (*j* - *i*) * *fraction*, where *fraction* is the fractional part of the
+              index surrounded by *i* and *j*.
+            * lower: *i*.
+            * higher: *j*.
+            * nearest: *i* or *j*, whichever is nearest.
+            * midpoint: (*i* + *j*) / 2.
+
+        Returns
+        -------
+        float or Series
+            If ``q`` is an array, a Series will be returned where the index is ``q`` and the values
+            are the quantiles.
+            If ``q`` is a float, the float value of that quantile will be returned.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+
+        With a scalar q:
+
+        >>> s.quantile(.5)
+        2.5
+
+        With a list q:
+
+        >>> s.quantile([.25, .5, .75]) # doctest: +NORMALIZE_WHITESPACE
+        0.25    1.75
+        0.50    2.50
+        0.75    3.25
+        dtype: float64
+
+        Values considered NaN do not affect the result:
+
+        >>> s = pd.Series([None, 0, 25, 50, 75, 100, np.nan])
+        >>> s.quantile([0, 0.25, 0.5, 0.75, 1]) # doctest: +NORMALIZE_WHITESPACE
+        0.00      0.0
+        0.25     25.0
+        0.50     50.0
+        0.75     75.0
+        1.00    100.0
+        dtype: float64
+        """
+
+    def reorder_levels():
+        """
+        Rearrange index levels using input order.
+        """
+
+    def replace():
+        """
+        Replace values given in `to_replace` with `value`.
+
+        Values of the DataFrame are replaced with other values dynamically.
+        This differs from updating with ``.loc`` or ``.iloc``, which require
+        you to specify a location to update with some value.
+
+        Parameters
+        ----------
+        to_replace : str, regex, list, dict, Series, int, float, or None
+            How to find the values that will be replaced.
+
+            * numeric, str or regex:
+
+                - numeric: numeric values equal to `to_replace` will be
+                  replaced with `value`
+                - str: string exactly matching `to_replace` will be replaced
+                  with `value`
+                - regex: regexs matching `to_replace` will be replaced with
+                  `value`
+
+            * list of str, regex, or numeric:
+
+                - First, if `to_replace` and `value` are both lists, they
+                  **must** be the same length.
+                - Second, if ``regex=True`` then all the strings in **both**
+                  lists will be interpreted as regexs otherwise they will match
+                  directly. This doesn't matter much for `value` since there
+                  are only a few possible substitution regexes you can use.
+                - str, regex and numeric rules apply as above.
+
+            * dict:
+
+                - Dicts can be used to specify different replacement values
+                  for different existing values. For example,
+                  ``{{'a': 'b', 'y': 'z'}}`` replaces the value 'a' with 'b' and
+                  'y' with 'z'. To use a dict in this way, the optional `value`
+                  parameter should not be given.
+                - For a DataFrame a dict can specify that different values
+                  should be replaced in different columns. For example,
+                  ``{{'a': 1, 'b': 'z'}}`` looks for the value 1 in column 'a'
+                  and the value 'z' in column 'b' and replaces these values
+                  with whatever is specified in `value`. The `value` parameter
+                  should not be ``None`` in this case. You can treat this as a
+                  special case of passing two lists except that you are
+                  specifying the column to search in.
+                - For a DataFrame nested dictionaries, e.g.,
+                  ``{{'a': {{'b': np.nan}}}}``, are read as follows: look in column
+                  'a' for the value 'b' and replace it with NaN. The optional `value`
+                  parameter should not be specified to use a nested dict in this
+                  way. You can nest regular expressions as well. Note that
+                  column names (the top-level dictionary keys in a nested
+                  dictionary) **cannot** be regular expressions.
+
+            * None:
+
+                - This means that the `regex` argument must be a string,
+                  compiled regular expression, or list, dict, ndarray or
+                  Series of such elements. If `value` is also ``None`` then
+                  this **must** be a nested dictionary or Series.
+
+            See the examples section for examples of each of these.
+        value : scalar, dict, list, str, regex, default None
+            Value to replace any values matching `to_replace` with.
+            For a DataFrame a dict of values can be used to specify which
+            value to use for each column (columns not in the dict will not be
+            filled). Regular expressions, strings and lists or dicts of such
+            objects are also allowed.
+        inplace : bool, default False
+            Whether to modify the DataFrame rather than creating a new one.
+        limit : int, default None
+            Maximum size gap to forward or backward fill.
+            This parameter is not supported.
+        regex : bool or same types as `to_replace`, default False
+            Whether to interpret `to_replace` and/or `value` as regular
+            expressions. Alternatively, this could be a regular expression or a
+            list, dict, or array of regular expressions in which case
+            `to_replace` must be ``None``.
+        method : {{'pad', 'ffill', 'bfill'}}
+            The method to use when for replacement, when `to_replace` is a
+            scalar, list or tuple and `value` is ``None``.
+            This parameter is not supported.
+
+        Returns
+        -------
+        DataFrame
+            DataFrame Object after replacement if inplace=False, None otherwise.
+
+        Raises
+        ------
+        AssertionError
+            * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
+
+        TypeError
+            * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
+            * If `to_replace` is a ``dict`` and `value` is not a ``list``,
+              ``dict``, ``ndarray``, or ``Series``
+            * If `to_replace` is ``None`` and `regex` is not compilable
+              into a regular expression or is a list, dict, ndarray, or
+              Series.
+            * When replacing multiple ``bool`` or ``datetime64`` objects and
+              the arguments to `to_replace` does not match the type of the
+              value being replaced
+
+        ValueError
+            * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
+              `value` but they are not the same length.
+
+        NotImplementedError
+            * If ``method`` or ``limit`` is provided.
+
+        Notes
+        -----
+        * Regex substitution is performed under the hood using snowflake backend.
+          which supports POSIX ERE syntax for regular expressions. Please check usage
+          notes for details.
+          https://docs.snowflake.com/en/sql-reference/functions-regexp#general-usage-notes
+        * Regular expressions only replace string values. If a regular expression is
+          created to match floating point numbers, it will only match string data not
+          numeric data.
+        * This method has *a lot* of options. You are encouraged to experiment
+          and play with this method to gain intuition about how it works.
+
+        Examples
+        --------
+
+        **Scalar `to_replace` and `value`**
+
+        >>> s = pd.Series([1, 2, 3, 4, 5])
+        >>> s.replace(1, 5)
+        0    5
+        1    2
+        2    3
+        3    4
+        4    5
+        dtype: int64
+
+        **dict-like `to_replace`**
+
+        >>> s.replace({1: 10, 2: 100})
+        0     10
+        1    100
+        2      3
+        3      4
+        4      5
+        dtype: int64
+
+        **Regular expression `to_replace`**
+
+        >>> s = pd.Series(['bat', 'foo', 'bait'])
+        >>> s.replace(to_replace=r'^ba.$', value='new', regex=True)
+        0     new
+        1     foo
+        2    bait
+        dtype: object
+
+        >>> s.replace(regex=r'^ba.$', value='new')
+        0     new
+        1     foo
+        2    bait
+        dtype: object
+
+        >>> s.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
+        0     new
+        1     xyz
+        2    bait
+        dtype: object
+
+        >>> s.replace(regex=[r'^ba.$', 'foo'], value='new')
+        0     new
+        1     new
+        2    bait
+        dtype: object
+
+        Compare the behavior of ``s.replace({{'a': None}})`` and
+        ``s.replace('a', None)`` to understand the peculiarities
+        of the `to_replace` parameter:
+
+        >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
+
+        When one uses a dict as the `to_replace` value, it is like the
+        value(s) in the dict are equal to the `value` parameter.
+        ``s.replace({{'a': None}})`` is equivalent to
+        ``s.replace(to_replace={{'a': None}}, value=None, method=None)``:
+
+        >>> s.replace({'a': None})
+        0      10
+        1    None
+        2    None
+        3       b
+        4    None
+        dtype: object
+
+        On the other hand, if ``None`` is explicitly passed for ``value``, it will
+        also be respected:
+
+        >>> s.replace('a', None)
+        0      10
+        1    None
+        2    None
+        3       b
+        4    None
+        dtype: object
+        """
+
+    def searchsorted():
+        """
+        Find indices where elements should be inserted to maintain order.
+        """
+
+    def sort_values():
+        # TODO: SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda
+        """
+        Sort by the values.
+
+        Sort a Series in ascending or descending order by some
+        criterion.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}
+            Unused. Parameter needed for compatibility with DataFrame.
+        ascending : bool or list of bools, default True
+            If True, sort values in ascending order, otherwise descending.
+        inplace : bool, default False
+            If True, perform operation in-place.
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'} default 'None'
+            Choice of sorting algorithm. By default, Snowpark Pandaas performs
+            unstable sort. Please use 'stable' to perform stable sort. Other choices
+            'quicksort', 'mergesort' and 'heapsort' are ignored.
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
+            the end.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+        key : callable, optional
+            If not None, apply the key function to the series values
+            before sorting. This is similar to the `key` argument in the
+            builtin :meth:`sorted` function, with the notable difference that
+            this `key` function should be *vectorized*. It should expect a
+            ``Series`` and return an array-like.
+
+        Returns
+        -------
+        Series or None
+            Series ordered by values or None if ``inplace=True``.
+
+        Notes
+        -----
+        Snowpark pandas API doesn't currently support distributed computation of
+        sort_values when 'key' argument is provided.
+
+        See Also
+        --------
+        Series.sort_index : Sort by the Series indices.
+        DataFrame.sort_values : Sort DataFrame by the values along either axis.
+        DataFrame.sort_index : Sort DataFrame by indices.
+
+        Examples
+        --------
+        >>> s = pd.Series([np.nan, 1, 3, 10, 5])
+        >>> s
+        0     NaN
+        1     1.0
+        2     3.0
+        3    10.0
+        4     5.0
+        dtype: float64
+
+        Sort values ascending order (default behaviour)
+
+        >>> s.sort_values(ascending=True)
+        1     1.0
+        2     3.0
+        4     5.0
+        3    10.0
+        0     NaN
+        dtype: float64
+
+        Sort values descending order
+
+        >>> s.sort_values(ascending=False)
+        3    10.0
+        4     5.0
+        2     3.0
+        1     1.0
+        0     NaN
+        dtype: float64
+
+        Sort values inplace
+
+        >>> s.sort_values(ascending=False, inplace=True)
+        >>> s
+        3    10.0
+        4     5.0
+        2     3.0
+        1     1.0
+        0     NaN
+        dtype: float64
+
+        Sort values putting NAs first
+
+        >>> s.sort_values(na_position='first')
+        0     NaN
+        1     1.0
+        2     3.0
+        4     5.0
+        3    10.0
+        dtype: float64
+
+        Sort a series of strings
+
+        >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
+        >>> s
+        0    z
+        1    b
+        2    d
+        3    a
+        4    c
+        dtype: object
+
+        >>> s.sort_values()
+        3    a
+        1    b
+        4    c
+        2    d
+        0    z
+        dtype: object
+
+        Sort using a key function. Your `key` function will be
+        given the ``Series`` of values and should return an array-like.
+
+        >>> s = pd.Series(['a', 'B', 'c', 'D', 'e'])
+        >>> s.sort_values()
+        1    B
+        3    D
+        0    a
+        2    c
+        4    e
+        dtype: object
+        >>> s.sort_values(key=lambda x: x.str.lower())  # doctest: +SKIP
+        0    a
+        1    B
+        2    c
+        3    D
+        4    e
+        dtype: object
+
+        NumPy ufuncs work well here. For example, we can
+        sort by the ``sin`` of the value
+
+        >>> s = pd.Series([-4, -2, 0, 2, 4])
+        >>> s.sort_values(key=np.sin)  # doctest: +SKIP
+        1   -2
+        4    4
+        2    0
+        0   -4
+        3    2
+        dtype: int8
+
+        More complicated user-defined functions can be used,
+        as long as they expect a Series and return an array-like
+
+        >>> s.sort_values(key=lambda x: (np.tan(x.cumsum())))  # doctest: +SKIP
+        0   -4
+        3    2
+        4    4
+        1   -2
+        2    0
+        dtype: int8
+        """
+
+    def squeeze():
+        """
+        Squeeze 1 dimensional axis objects into scalars.
+
+        Series or DataFrames with a single element are squeezed to a scalar.
+        DataFrames with a single column or a single row are squeezed to a
+        Series. Otherwise, the object is unchanged.
+
+        This method is most useful when you don't know if your
+        object is a Series or DataFrame, but you do know it has just a single
+        column. In that case you can safely call `squeeze` to ensure you have a
+        Series.
+
+        Parameters
+        ----------
+        axis : {0 or 'index', 1 or 'columns', None}, default None
+            A specific axis to squeeze. By default, all length-1 axes are
+            squeezed. For `Series` this parameter is unused and defaults to `None`.
+
+        Returns
+        -------
+        DataFrame, Series, or scalar
+            The projection after squeezing `axis` or all the axes.
+
+        See Also
+        --------
+        Series.iloc : Integer-location based indexing for selecting scalars.
+        DataFrame.iloc : Integer-location based indexing for selecting Series.
+        Series.to_frame : Inverse of DataFrame.squeeze for a
+            single-column DataFrame.
+
+        Examples
+        --------
+        >>> primes = pd.Series([2, 3, 5, 7])
+
+        Slicing might produce a Series with a single value:
+
+        >>> even_primes = primes[primes % 2 == 0]   # doctest: +SKIP
+        >>> even_primes   # doctest: +SKIP
+        0    2
+        dtype: int64
+
+        >>> even_primes.squeeze()   # doctest: +SKIP
+        2
+
+        Squeezing objects with more than one value in every axis does nothing:
+
+        >>> odd_primes = primes[primes % 2 == 1]   # doctest: +SKIP
+        >>> odd_primes   # doctest: +SKIP
+        1    3
+        2    5
+        3    7
+        dtype: int64
+
+        >>> odd_primes.squeeze()   # doctest: +SKIP
+        1    3
+        2    5
+        3    7
+        dtype: int64
+
+        Squeezing is even more effective when used with DataFrames.
+
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
+        >>> df
+           a  b
+        0  1  2
+        1  3  4
+
+        Slicing a single column will produce a DataFrame with the columns
+        having only one value:
+
+        >>> df_a = df[['a']]
+        >>> df_a
+           a
+        0  1
+        1  3
+
+        So the columns can be squeezed down, resulting in a Series:
+
+        >>> df_a.squeeze('columns')
+        0    1
+        1    3
+        Name: a, dtype: int64
+
+        Slicing a single row from a single column will produce a single
+        scalar DataFrame:
+
+        >>> df_0a = df.loc[df.index < 1, ['a']]
+        >>> df_0a
+           a
+        0  1
+
+        Squeezing the rows produces a single scalar Series:
+
+        >>> df_0a.squeeze('rows')
+        a    1
+        Name: 0, dtype: int64
+
+        Squeezing all axes will project directly into a scalar:
+
+        >>> df_0a.squeeze()
+        1
+        """
+
+    @_create_operator_docstring(pandas.core.series.Series.sub, overwrite_existing=True)
+    def sub():
+        pass
+
+    subtract = sub
+
+    def swaplevel():
+        """
+        Swap levels `i` and `j` in a `MultiIndex`.
+        """
+
+    def take():
+        """
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
+
+        Parameters
+        ----------
+        indices : array-like
+            An array of ints indicating which positions to take.
+        axis : {0 or 'index', 1 or 'columns', None}, default 0
+            The axis on which to select elements. ``0`` means that we are
+            selecting rows, ``1`` means that we are selecting columns.
+            For `Series` this parameter is unused and defaults to 0.
+        **kwargs
+            For compatibility with :meth:`numpy.take`. Has no effect on the
+            output.
+
+        Returns
+        -------
+        same type as caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        Series.take : Take a subset of a Series by the given positional indices.
+        DataFrame.loc : Select a subset of a DataFrame by labels.
+        DataFrame.iloc : Select a subset of a DataFrame by positions.
+
+        Examples
+        --------
+        >>> ser = pd.Series([-1, 5, 6, 2, 4])
+        >>> ser
+        0   -1
+        1    5
+        2    6
+        3    2
+        4    4
+        dtype: int64
+
+        Take elements at positions 0 and 3 along the axis 0 (default).
+
+        >>> ser.take([0, 3])
+        0   -1
+        3    2
+        dtype: int64
+
+
+        For `Series` axis parameter is unused and defaults to 0.
+
+        >>> ser.take([0, 3], axis=1)
+        0   -1
+        3    2
+        dtype: int64
+
+        We may take elements using negative integers for positive indices,
+        starting from the end of the object, just like with Python lists.
+
+        >>> ser.take([-1, -2])
+        4    4
+        3    2
+        dtype: int64
+
+        """
+
+    def to_dict():
+        """
+        Convert Series to {label -> value} dict or dict-like object.
+        Note that this method will pull the data to the client side.
+
+        Parameters
+        ----------
+        into : class, default dict
+            The collections.abc.Mapping subclass to use as the return
+            object. Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass it initialized.
+
+        Returns
+        -------
+        collections.abc.Mapping
+            Key-value representation of Series.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3, 4])
+        >>> s.to_dict()
+        {0: 1, 1: 2, 2: 3, 3: 4}
+        >>> from collections import OrderedDict, defaultdict
+        >>> s.to_dict(OrderedDict)
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> dd = defaultdict(list)
+        >>> s.to_dict(dd)
+        defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
+        """
+
+    def to_frame():
+        """
+        Convert Series to {label -> value} dict or dict-like object.
+        """
+
+    def to_list():
+        """
+        Return a list of the values.
+        """
+
+    def to_numpy():
+        """
+        A NumPy ndarray representing the values in this Series or Index.
+
+        Parameters
+        ----------
+        dtype : str or numpy.dtype, optional
+            The dtype to pass to :meth:`numpy.asarray`.
+        copy : bool, default False
+            This argument is ignored in Snowflake backend. The data from Snowflake
+            will be retrieved into the client, and a numpy array containing this
+            data will be returned.
+        na_value : Any, optional
+            The value to use for missing values. The default value depends
+            on `dtype` and the type of the array.
+        **kwargs
+            Additional keywords passed through to the ``to_numpy`` method
+            of the underlying array (for extension arrays).
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+
+    tolist = to_list
+
+    def to_period():
+        """
+        Cast to PeriodArray/Index at a particular frequency.
+        """
+
+    def to_string():
+        """
+        Render a string representation of the Series.
+        """
+
+    def to_timestamp():
+        """
+        Cast to DatetimeIndex of Timestamps, at beginning of period.
+        """
+
+    def transpose():
+        """
+        Return the transpose, which is by definition `self`.
+        """
+
+    T = property(transpose)
+
+    @_create_operator_docstring(
+        pandas.core.series.Series.truediv, overwrite_existing=True
+    )
+    def truediv():
+        pass
+
+    div = divide = truediv
+
+    def truncate():
+        """
+        Truncate a Series before and after some index value.
+        """
+
+    def unique():
+        """
+        Return unique values of Series object.
+
+        Uniques are returned in order of appearance. Hash table-based unique,
+        therefore does NOT sort.
+
+        Returns
+        -------
+        ndarray
+            The unique values returned as a NumPy array. See Notes.
+
+        See Also
+        --------
+        Series.drop_duplicates : Return Series with duplicate values removed.
+        unique : Top-level unique method for any 1-d array-like object.
+        Index.unique : Return Index with unique values from an Index object.
+
+        Notes
+        -----
+        Returns the unique values as a NumPy array. This includes
+
+            * Datetime with Timezone
+            * IntegerNA
+
+        See Examples section.
+
+        Examples
+        --------
+        >>> pd.Series([2, 1, 3, 3], name='A').unique()
+        array([2, 1, 3])
+
+        >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
+        ...            for _ in range(3)]).unique()
+        array([Timestamp('2015-12-31 21:00:00-0800', tz='America/Los_Angeles')],
+              dtype=object)
+
+        """
+
+    def update():
+        """
+        Modify Series in place using values from passed Series.
+        """
+
+    def value_counts():
+        """
+        Return a Series containing counts of unique values.
+
+        The resulting object will be in descending order so that the
+        first element is the most frequently-occurring element.
+        Excludes NA values by default.
+
+        Parameters
+        ----------
+        normalize : bool, default False
+            If True then the object returned will contain the relative
+            frequencies of the unique values. Being different from native pandas,
+            Snowpark pandas will return a Series with `decimal.Decimal` values.
+        sort : bool, default True
+            Sort by frequencies when True. Preserve the order of the data when False.
+            When there is a tie between counts, the order is still deterministic, but
+            may be different from the result from native pandas.
+        ascending : bool, default False
+            Sort in ascending order.
+        bins : int, optional
+            Rather than count values, group them into half-open bins,
+            a convenience for ``pd.cut``, only works with numeric data.
+            This argument is not supported yet.
+        dropna : bool, default True
+            Don't include counts of NaN.
+
+        Returns
+        -------
+        Series
+
+        See Also
+        --------
+        Series.count: Number of non-NA elements in a Series.
+        DataFrame.count: Number of non-NA elements in a DataFrame.
+        DataFrame.value_counts: Equivalent method on DataFrames.
+
+        Examples
+        --------
+        >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
+        >>> s.value_counts()
+        3.0    2
+        1.0    1
+        2.0    1
+        4.0    1
+        Name: count, dtype: int64
+
+        With `normalize` set to `True`, returns the relative frequency by
+        dividing all values by the sum of values.
+
+        >>> s.value_counts(normalize=True)
+        3.0    0.4
+        1.0    0.2
+        2.0    0.2
+        4.0    0.2
+        Name: proportion, dtype: float64
+
+        **dropna**
+
+        With `dropna` set to `False` we can also see NaN index values.
+
+        >>> s.value_counts(dropna=False)
+        3.0    2
+        1.0    1
+        2.0    1
+        4.0    1
+        NaN    1
+        Name: count, dtype: int64
+        """
+
+    def view():
+        """
+        Create a new view of the Series.
+        """
+
+    def where():
+        """
+        Replace values where the condition is False.
+
+        Args:
+            cond: bool Series/DataFrame, array-like, or callable
+                Where cond is True, keep the original value. Where False, replace with corresponding value from other.
+                If cond is callable, it is computed on the Series/DataFrame and should return boolean Series/DataFrame
+                or array. The callable must not change input Series/DataFrame (though pandas doesn’t check it).
+
+            other: scalar, Series/DataFrame, or callable
+                Entries where cond is False are replaced with corresponding value from other. If other is callable,
+                it is computed on the Series/DataFrame and should return scalar or Series/DataFrame. The callable must
+                not change input Series/DataFrame (though pandas doesn’t check it). If not specified, entries will be
+                filled with the corresponding NULL value (np.nan for numpy dtypes, pd.NA for extension dtypes).
+
+            inplace: bool, default False
+                Whether to perform the operation in place on the data.
+
+            axis: int, default None
+                Alignment axis if needed. For Series this parameter is unused and defaults to 0.
+
+            level: int, default None
+                Alignment level if needed.
+
+        Returns:
+            Same type as caller or None if inplace=True.
+
+        Notes:
+            The where method is an application of the if-then idiom. For each element in the calling DataFrame, if cond
+            is True the element is used; otherwise the corresponding element from the DataFrame other is used. If the
+            axis of other does not align with axis of cond Series/DataFrame, the misaligned index positions will be
+            filled with False.
+
+            The signature for DataFrame.where() differs from numpy.where(). Roughly df1.where(m, df2) is equivalent to
+            np.where(m, df1, df2).
+
+            For further details and examples see the where documentation in indexing.
+
+            The dtype of the object takes precedence. The fill value is casted to the object’s dtype, if this can be
+            done losslessly.
+
+        Examples::
+        >>> s = pd.Series(range(5))
+        >>> s.where(s > 0)  # doctest: +NORMALIZE_WHITESPACE
+        0    NaN
+        1    1.0
+        2    2.0
+        3    3.0
+        4    4.0
+        dtype: float64
+
+        >>> s = pd.Series(range(5))
+        >>> t = pd.Series([True, False])
+        >>> s.where(t, 99)  # doctest: +NORMALIZE_WHITESPACE
+        0     0
+        1    99
+        2    99
+        3    99
+        4    99
+        dtype: int64
+
+        >>> s.where(s > 1, 10)  # doctest: +NORMALIZE_WHITESPACE
+        0    10
+        1    10
+        2    2
+        3    3
+        4    4
+        dtype: int64
+        """
+
+    def xs():
+        """
+        Return cross-section from the Series/DataFrame.
+        """
+
+    @property
+    def attrs():
+        """
+        Return dictionary of global attributes of this dataset.
+        """
+
+    @property
+    def array():
+        """
+        Return the ExtensionArray of the data backing this Series or Index.
+        """
+
+    @property
+    def axes():
+        """
+        Return a list of the row axis labels.
+        """
+
+    @property
+    def cat():
+        """
+        Accessor object for categorical properties of the Series values.
+        """
+
+    @property
+    def dt():
+        """
+        Accessor object for datetimelike properties of the Series values.
+        """
+
+    @property
+    def dtype():
+        """
+        Return the dtype object of the underlying data.
+        See :func:`DataFrame.dtypes` for exact behavior.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.dtype
+        dtype('int64')
+        """
+
+    dtypes = dtype
+
+    @property
+    def empty():
+        pass
+
+    @property
+    def hasnans():
+        pass
+
+    def isna():
+        """
+        Detect missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are missing (NaN in numeric
+        arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : scalar or array-like
+                Object to check for null or missing values.
+
+        Returns
+        -------
+        bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is missing.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.isna()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        >>> df.isnull()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        """
+
+    def isnull():
+        """
+        Detect missing values for an array-like object.
+
+        This function takes a scalar or array-like object and indicates whether values are missing (NaN in numeric
+        arrays, None or NaN in object arrays, NaT in datetimelike).
+
+        Parameters
+        ----------
+        obj : scalar or array-like
+                Object to check for null or missing values.
+
+        Returns
+        -------
+        bool or array-like of bool
+            For scalar input, returns a scalar boolean. For array input, returns an array of boolean indicating whether
+            each corresponding element is missing.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
+        >>> df
+             0     1    2
+        0  ant   bee  cat
+        1  dog  None  fly
+        >>> df.isna()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        >>> df.isnull()
+               0      1      2
+        0  False  False  False
+        1  False   True  False
+        """
+
+    @property
+    def is_monotonic_increasing():
+        """
+        Return True if values in the Series are monotonic_increasing.
+        """
+
+    @property
+    def is_monotonic_decreasing():
+        """
+        Return True if values in the Series are monotonic_decreasing.
+        """
+
+    @property
+    def is_unique():
+        """
+        Return True if values in the Series are unique.
+        """
+
+    @property
+    def nbytes():
+        """
+        Return the number of bytes in the underlying data.
+        """
+
+    @property
+    def ndim(self) -> int:
+        pass
+
+    def nunique():
+        """
+        Return number of unique elements in the object.
+        """
+
+    @property
+    def shape():
+        pass
+
+    def shift():
+        """
+        Shift data by desired number of periods and replace columns with fill_value (default: None).
+
+        Snowpark pandas does not support `freq` currently.
+
+        The axis parameter is unused, and defaults to 0.
+
+        Parameters
+        ----------
+        periods : int
+            Number of periods to shift. Can be positive or negative.
+        freq : not supported, default None
+        axis : {0 or 'index', 1 or 'columns', None}, default None
+            Shift direction. This parameter is unused and expects 0, 'index' or None.
+        fill_value : object, optional
+            The scalar value to use for newly introduced missing values.
+            the default depends on the dtype of `self`.
+            For numeric data, ``np.nan`` is used.
+            For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
+            For extension dtypes, ``self.dtype.na_value`` is used.
+
+        Returns
+        -------
+        Series
+            Copy of input object, shifted.
+
+        Examples
+        --------
+        >>> s = pd.Series([10, 20, 15, 30, 45],
+        ...                   index=pd.date_range("2020-01-01", "2020-01-05"))
+        >>> s
+        2020-01-01    10
+        2020-01-02    20
+        2020-01-03    15
+        2020-01-04    30
+        2020-01-05    45
+        Freq: None, dtype: int64
+
+        >>> s.shift(periods=3)
+        2020-01-01     NaN
+        2020-01-02     NaN
+        2020-01-03     NaN
+        2020-01-04    10.0
+        2020-01-05    20.0
+        Freq: None, dtype: float64
+
+
+        >>> s.shift(periods=-2)
+        2020-01-01    15.0
+        2020-01-02    30.0
+        2020-01-03    45.0
+        2020-01-04     NaN
+        2020-01-05     NaN
+        Freq: None, dtype: float64
+
+
+        >>> s.shift(periods=3, fill_value=0)
+        2020-01-01     0
+        2020-01-02     0
+        2020-01-03     0
+        2020-01-04    10
+        2020-01-05    20
+        Freq: None, dtype: int64
+
+        """
+
+    @property
+    def str():
+        """
+        Vectorized string functions for Series and Index.
+        """
diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/window.py b/src/snowflake/snowpark/modin/plugin/docstrings/window.py
new file mode 100644
index 00000000000..c8c113d9725
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/docstrings/window.py
@@ -0,0 +1,454 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This module contains Rolling docstrings that override modin's docstrings."""
+
+
+from textwrap import dedent
+
+from pandas.util._decorators import doc
+
+_rolling_agg_method_engine_template = """
+Compute the rolling {fname}.
+
+Parameters
+----------
+numeric_only : bool, default {no}
+    Include only float, int, boolean columns.
+
+{args}
+
+engine : str, default None {e}
+    * ``'cython'`` : Runs the operation through C-extensions from cython.
+    * ``'numba'`` : Runs the operation through JIT compiled code from numba.
+    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+engine_kwargs : dict, default None {ek}
+    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+        and ``parallel`` dictionary keys. The values must either be ``True`` or
+        ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+        ``{{'nopython': True, 'nogil': False, 'parallel': False}}``.
+
+    **This parameter is ignored in Snowpark pandas. The execution engine will always be Snowflake.**
+
+**kwargs
+    Keyword arguments to be passed into func.
+
+Returns
+-------
+:class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame`
+    Computed rolling {fname} of values.
+
+Examples
+--------
+{example}
+"""
+
+_rolling_aggregate_method_doc_template = """
+Rolling aggregate using one or more operations.
+
+Parameters
+----------
+func : function, str, list, or dict
+    Function to use for aggregating the data.
+    Accepted combinations are:
+    - function
+    - string function name
+    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+    - dict of axis labels -> functions, function names or list of such.
+
+*args
+    Positional arguments to pass to func.
+
+**kwargs
+    Keyword arguments to be passed into func.
+
+Returns
+-------
+Scalar
+    Case when `Series.agg` is called with a single function.
+:class:`~snowflake.snowpark.modin.pandas.Series`
+    Case when `DataFrame.agg` is called with a single function.
+:class:`~snowflake.snowpark.modin.pandas.DataFrame`
+    Case when `DataFrame.agg` is called with several functions.
+
+{examples}
+"""
+
+
+_aggregate_examples_rolling_doc = dedent(
+    """
+Examples
+--------
+>>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+>>> df
+        B
+0  0.0
+1  1.0
+2  2.0
+3  NaN
+4  4.0
+dtype: float64
+>>> df.rolling(2, min_periods=1).aggregate("mean")
+        B
+0  0.0
+1  0.5
+2  1.5
+3  2.0
+4  4.0
+dtype: float64
+>>> df.rolling(2, min_periods=1).aggregate(["min", "max"])
+        B
+    min  max
+0  0.0  0.0
+1  0.0  1.0
+2  1.0  2.0
+3  2.0  2.0
+4  4.0  4.0
+dtype: float64
+"""
+)
+
+
+class Rolling:  # pragma: no cover: we use this class's docstrings, but we never execute its methods.
+    def count():
+        pass
+
+    def sem():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="sum",
+        no=False,
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).sum()
+             B
+        0  0.0
+        1  1.0
+        2  3.0
+        3  2.0
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=2).sum()
+             B
+        0  NaN
+        1  1.0
+        2  3.0
+        3  NaN
+        4  NaN
+        dtype: float64
+        >>> df.rolling(3, min_periods=1, center=True).sum()
+             B
+        0  1.0
+        1  3.0
+        2  3.0
+        3  6.0
+        4  4.0
+        dtype: float64"""
+        ),
+    )
+    def sum():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="mean",
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).mean()
+             B
+        0  0.0
+        1  0.5
+        2  1.5
+        3  2.0
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=2).mean()
+             B
+        0  NaN
+        1  0.5
+        2  1.5
+        3  NaN
+        4  NaN
+        dtype: float64
+        >>> df.rolling(3, min_periods=1, center=True).mean()
+             B
+        0  0.5
+        1  1.0
+        2  1.5
+        3  3.0
+        4  4.0
+        dtype: float64"""
+        ),
+    )
+    def mean():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="median",
+        args=None,
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).median()
+             B
+        0  0.0
+        1  0.5
+        2  1.5
+        3  2.0
+        4  4.0
+        dtype: float64"""
+        ),
+    )
+    def median():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="var",
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).var()
+             B
+        0  NaN
+        1  0.5
+        2  0.5
+        3  NaN
+        4  NaN
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).var(ddof=0)
+              B
+        0  0.00
+        1  0.25
+        2  0.25
+        3  0.00
+        4  0.00
+        >>> df.rolling(3, min_periods=1, center=True).var()
+             B
+        0  0.5
+        1  1.0
+        2  0.5
+        3  2.0
+        4  NaN
+        dtype: float64"""
+        ),
+    )
+    def var():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="std",
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).std()
+                  B
+        0       NaN
+        1  0.707107
+        2  0.707107
+        3       NaN
+        4       NaN
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).std(ddof=0)
+             B
+        0  0.0
+        1  0.5
+        2  0.5
+        3  0.0
+        4  0.0
+        >>> df.rolling(3, min_periods=1, center=True).std()
+                  B
+        0  0.707107
+        1  1.000000
+        2  0.707107
+        3  1.414214
+        4       NaN
+        dtype: float64"""
+        ),
+    )
+    def std():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="min",
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).min()
+             B
+        0  0.0
+        1  0.0
+        2  1.0
+        3  2.0
+        4  4.0
+        dtype: float64"""
+        ),
+    )
+    def min():
+        pass
+
+    @doc(
+        _rolling_agg_method_engine_template,
+        fname="max",
+        args=dedent(
+            """\
+        *args
+            Positional arguments to pass to func."""
+        ),
+        no=False,
+        e=None,
+        ek=None,
+        example=dedent(
+            """\
+        >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
+        >>> df
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  NaN
+        4  4.0
+        dtype: float64
+        >>> df.rolling(2, min_periods=1).max()
+             B
+        0  0.0
+        1  1.0
+        2  2.0
+        3  2.0
+        4  4.0
+        dtype: float64"""
+        ),
+    )
+    def max():
+        pass
+
+    def corr():
+        pass
+
+    def cov():
+        pass
+
+    def skew():
+        pass
+
+    def kurt():
+        pass
+
+    def apply():
+        pass
+
+    @doc(
+        _rolling_aggregate_method_doc_template, examples=_aggregate_examples_rolling_doc
+    )
+    def aggregate():
+        pass
+
+    agg = aggregate
+
+    def quantile():
+        pass
+
+    def rank():
+        pass
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/__init__.py b/src/snowflake/snowpark/modin/plugin/extensions/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py
new file mode 100644
index 00000000000..21fdf4c915c
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_extensions.py
@@ -0,0 +1,240 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+File containing DataFrame APIs defined in Snowpark pandas but not the Modin API layer, such
+as `DataFrame.to_snowflake`.
+"""
+
+from collections.abc import Iterable
+from typing import Any, Literal, Optional, Union
+
+from pandas._typing import IndexLabel
+
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.pandas.api.extensions import register_dataframe_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+
+
+# Snowflake specific dataframe methods
+# We use extensions, as we want to make clear that a Snowpark pandas DataFrame is NOT a
+# pandas DataFrame.
+# Implementation note: Arguments names and types are kept consistent with pandas.DataFrame.to_sql
+@register_dataframe_accessor("to_snowflake")
+@snowpark_pandas_telemetry_method_decorator
+def to_snowflake(
+    self,
+    name: Union[str, Iterable[str]],
+    if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
+    index: bool = True,
+    index_label: Optional[IndexLabel] = None,
+    table_type: Literal["", "temp", "temporary", "transient"] = "",
+) -> None:
+    """
+    Save the Snowpark pandas DataFrame as a Snowflake table.
+
+    Args:
+        name:
+            Name of the SQL table or fully-qualified object identifier
+        if_exists:
+            How to behave if table already exists. default 'fail'
+            - fail: Raise ValueError.
+            - replace: Drop the table before inserting new values.
+            - append: Insert new values to the existing table. The order of insertion is not guaranteed.
+        index: default True
+            If true, save DataFrame index columns as table columns.
+        index_label:
+            Column label for index column(s). If None is given (default) and index is True,
+            then the index names are used. A sequence should be given if the DataFrame uses MultiIndex.
+        table_type:
+            The table type of table to be created. The supported values are: ``temp``, ``temporary``,
+            and ``transient``. An empty string means to create a permanent table. Learn more about table
+            types `here <https://docs.snowflake.com/en/user-guide/tables-temp-transient.html>`_.
+
+    See also:
+        - :func:`to_snowflake <snowflake.snowpark.modin.pandas.io.to_snowflake>`
+        - :func:`Series.to_snowflake <snowflake.snowpark.modin.pandas.Series.to_snowflake>`
+        - :func:`read_snowflake <snowflake.snowpark.modin.pandas.io.read_snowflake>`
+
+    """
+    self._query_compiler.to_snowflake(name, if_exists, index, index_label, table_type)
+
+
+@register_dataframe_accessor("to_snowpark")
+@snowpark_pandas_telemetry_method_decorator
+def to_snowpark(
+    self, index: bool = True, index_label: Optional[IndexLabel] = None
+) -> SnowparkDataFrame:
+    """
+    Convert the Snowpark pandas DataFrame to a Snowpark DataFrame.
+    Note that once converted to a Snowpark DataFrame, no ordering information will be preserved. You can call
+    reset_index to generate a default index column that is the same as the row position before the call to_snowpark.
+
+    Args:
+        index: bool, default True.
+            Whether to keep the index columns in the result Snowpark DataFrame. If True, the index columns
+            will be the first set of columns. Otherwise, no index column will be included in the final Snowpark
+            DataFrame.
+        index_label: IndexLabel, default None.
+            Column label(s) to use for the index column(s). If None is given (default) and index is True,
+            then the original index column labels are used. A sequence should be given if the DataFrame uses
+            MultiIndex, and the length of the given sequence should be the same as the number of index columns.
+
+    Returns:
+        Snowpark :class:`~snowflake.snowpark.dataframe.DataFrame`
+            A Snowpark DataFrame contains the index columns if index=True and all data columns of the Snowpark pandas
+            DataFrame. The identifier for the Snowpark DataFrame will be the normalized quoted identifier with
+            the same name as the pandas label.
+
+    Raises:
+         ValueError if duplicated labels occur among the index and data columns.
+         ValueError if the label used for a index or data column is None.
+
+    See also:
+        - :func:`to_snowpark <snowflake.snowpark.modin.pandas.io.to_snowpark>`
+        - :func:`Series.to_snowpark <snowflake.snowpark.modin.pandas.Series.to_snowpark>`
+
+    Note:
+        The labels of the Snowpark pandas DataFrame or index_label provided will be used as Normalized Snowflake
+        Identifiers of the Snowpark DataFrame.
+        For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~snowflake.snowpark.modin.pandas.io.read_snowflake`
+
+    Examples::
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]})
+        >>> df
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
+        >>> snowpark_df = df.to_snowpark(index_label='Order')
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        ------------------------------------
+        |"Order"  |"Animal"  |"Max Speed"  |
+        ------------------------------------
+        |2        |Parrot    |24.0         |
+        |3        |Parrot    |26.0         |
+        |1        |Falcon    |370.0        |
+        |0        |Falcon    |380.0        |
+        ------------------------------------
+        <BLANKLINE>
+        >>> snowpark_df = df.to_snowpark(index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        --------------------------
+        |"Animal"  |"Max Speed"  |
+        --------------------------
+        |Parrot    |24.0         |
+        |Parrot    |26.0         |
+        |Falcon    |370.0        |
+        |Falcon    |380.0        |
+        --------------------------
+        <BLANKLINE>
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]}, index=pd.Index([3, 5, 6, 7], name="id"))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+            Animal  Max Speed
+        id
+        3  Falcon      380.0
+        5  Falcon      370.0
+        6  Parrot       24.0
+        7  Parrot       26.0
+        >>> snowpark_df = df.to_snowpark()
+        >>> snowpark_df.order_by('"id"').show()
+        ---------------------------------
+        |"id"  |"Animal"  |"Max Speed"  |
+        ---------------------------------
+        |3     |Falcon    |380.0        |
+        |5     |Falcon    |370.0        |
+        |6     |Parrot    |24.0         |
+        |7     |Parrot    |26.0         |
+        ---------------------------------
+        <BLANKLINE>
+
+        MultiIndex usage
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]},
+        ...                    index=pd.MultiIndex.from_tuples([('bar', 'one'), ('foo', 'one'), ('bar', 'two'), ('foo', 'three')], names=['first', 'second']))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+                        Animal  Max Speed
+        first second
+        bar   one     Falcon      380.0
+        foo   one     Falcon      370.0
+        bar   two     Parrot       24.0
+        foo   three   Parrot       26.0
+        >>> snowpark_df = df.to_snowpark(index=True, index_label=['A', 'B'])
+        >>> snowpark_df.order_by('"A"', '"B"').show()
+        ----------------------------------------
+        |"A"  |"B"    |"Animal"  |"Max Speed"  |
+        ----------------------------------------
+        |bar  |one    |Falcon    |380.0        |
+        |bar  |two    |Parrot    |24.0         |
+        |foo  |one    |Falcon    |370.0        |
+        |foo  |three  |Parrot    |26.0         |
+        ----------------------------------------
+        <BLANKLINE>
+        >>> snowpark_df = df.to_snowpark(index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        --------------------------
+        |"Animal"  |"Max Speed"  |
+        --------------------------
+        |Parrot    |24.0         |
+        |Parrot    |26.0         |
+        |Falcon    |370.0        |
+        |Falcon    |380.0        |
+        --------------------------
+        <BLANKLINE>
+    """
+    return self._query_compiler.to_snowpark(index, index_label)
+
+
+@register_dataframe_accessor("to_pandas")
+@snowpark_pandas_telemetry_method_decorator
+def to_pandas(
+    self,
+    *,
+    statement_params: Optional[dict[str, str]] = None,
+    **kwargs: Any,
+) -> pd.DataFrame:
+    """
+    Convert Snowpark pandas DataFrame to pandas DataFrame
+
+    Args:
+        statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+    Returns:
+        pandas DataFrame
+
+    See also:
+        - :func:`to_pandas <snowflake.snowpark.modin.pandas.io.to_pandas>`
+        - :func:`Series.to_pandas <snowflake.snowpark.modin.pandas.Series.to_pandas>`
+
+    Examples:
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]})
+        >>> df.to_pandas()
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
+
+        >>> df['Animal'].to_pandas()
+        0    Falcon
+        1    Falcon
+        2    Parrot
+        3    Parrot
+        Name: Animal, dtype: object
+    """
+    return self._to_pandas(statement_params=statement_params, **kwargs)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
new file mode 100644
index 00000000000..b1504cc62e0
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_overrides.py
@@ -0,0 +1,130 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+File containing DataFrame APIs defined in the Modin API layer, but with different behavior in Snowpark
+pandas, such as `DataFrame.memory_usage`.
+"""
+
+from typing import Any
+
+import pandas as native_pd
+
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.pandas import DataFrame, Series
+from snowflake.snowpark.modin.pandas.api.extensions import register_dataframe_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+
+@_inherit_docstrings(native_pd.DataFrame.memory_usage, apilink="pandas.DataFrame")
+@register_dataframe_accessor("memory_usage")
+@snowpark_pandas_telemetry_method_decorator
+def memory_usage(self, index: bool = True, deep: bool = False) -> Any:
+    """
+    Memory Usage (Dummy Information)
+
+    The memory usage of a snowflake dataframe is not fully implemented.
+    This method returns a series like the pandas dataframe to maintain
+    compatibility with code which calls this for logging purposes, but
+    the values are 0.
+
+    Args:
+        index: return dummy index memory usage
+        deep: ignored
+
+    Returns:
+        Series with zeros for each index and column in the dataframe
+
+    Examples:
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]})
+        >>> df.memory_usage()
+        Index        0
+        Animal       0
+        Max Speed    0
+        dtype: int64
+
+        >>> df.memory_usage(index=False)
+        Animal       0
+        Max Speed    0
+        dtype: int64
+    """
+    # TODO: SNOW-1264697: push implementation down to query compiler
+    columns = (["Index"] if index else []) + self._get_columns().array.tolist()
+    return native_pd.Series([0] * len(columns), index=columns)
+
+
+@_inherit_docstrings(native_pd.DataFrame.infer_objects, apilink="pandas.DataFrame")
+@register_dataframe_accessor("infer_objects")
+@snowpark_pandas_telemetry_method_decorator
+def infer_objects(
+    self,
+) -> SnowflakeQueryCompiler:  # pragma: no cover # noqa: RT01, D200
+    """
+    Attempt to infer better dtypes for object columns.
+    """
+    ErrorMessage.not_implemented()
+    return self.__constructor__(query_compiler=self._query_compiler.infer_objects())
+
+
+@_inherit_docstrings(native_pd.DataFrame.nunique, apilink="pandas.DataFrame")
+@register_dataframe_accessor("nunique")
+@snowpark_pandas_telemetry_method_decorator
+def nunique(self, axis: int = 0, dropna: bool = True) -> Series:
+    """
+    Count number of distinct elements in specified axis.
+
+    Return Series with number of distinct elements. Can ignore NaN
+    values. Snowpark pandas API does not distinguish between NaN values and treats them all as the same.
+
+    Parameters
+    ----------
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for
+        column-wise.
+    dropna : bool, default True
+        Don't include NaN in the counts.
+
+    Returns
+    -------
+    Series
+
+    Examples
+    --------
+    >>> import snowflake.snowpark.modin.pandas as pd
+    >>> df = pd.DataFrame({'A': [4, 5, 6], 'B': [4, 1, 1]})
+    >>> df.nunique()
+    A    3
+    B    2
+    dtype: int8
+
+    >>> df.nunique(axis=1)
+    0    1
+    1    2
+    2    2
+    dtype: int8
+
+    >>> df = pd.DataFrame({'A': [None, pd.NA, None], 'B': [1, 2, 1]})
+    >>> df.nunique()
+    A    0
+    B    2
+    dtype: int8
+
+    >>> df.nunique(dropna=False)
+    A    1
+    B    2
+    dtype: int8
+
+    """
+    # TODO: SNOW-1264688: remove this override
+    return super(DataFrame, self).nunique(axis=axis, dropna=dropna)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py
new file mode 100644
index 00000000000..8f55985f2b7
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/pd_extensions.py
@@ -0,0 +1,602 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+File containing top-level APIs defined in Snowpark pandas but not the Modin API layer
+under the `pd` namespace, such as `pd.read_snowflake`.
+"""
+import inspect
+from typing import Any, Iterable, Literal, Optional, Union
+
+from pandas._typing import IndexLabel
+
+from snowflake.snowpark import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin.pandas import DataFrame, Series
+from snowflake.snowpark.modin.pandas.api.extensions import register_pd_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_standalone_function_decorator,
+)
+
+
+def _snowpark_pandas_obj_check(obj: Union[DataFrame, Series]):
+    if not isinstance(obj, (DataFrame, Series)):
+        raise TypeError("obj must be a Snowpark pandas DataFrame or Series")
+
+
+@register_pd_accessor("read_snowflake")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_snowflake(
+    name_or_query: Union[str, Iterable[str]],
+    index_col: Union[str, list[str], None] = None,
+    columns: Optional[list[str]] = None,
+) -> DataFrame:
+    """
+    Read a Snowflake table or SQL Query to a Snowpark pandas DataFrame.
+
+    Args:
+        name_or_query: A table name or fully-qualified object identifier or a SQL SELECT Query. It follows the same syntax in
+            https://docs.snowflake.com/developer-guide/snowpark/reference/python/api/snowflake.snowpark.Session.table.html
+        index_col: A column name or a list of column names to use as index.
+        columns: A list of column names to select from the table. If not specified, select all columns.
+
+    See also:
+        - :func:`to_snowflake <snowflake.snowpark.modin.pandas.io.to_snowflake>`
+
+    Notes:
+        Transformations applied to the returned Snowpark pandas Dataframe do not affect the underlying Snowflake table
+        (or object). Use
+        - :func:`snowflake.snowpark.modin.pandas.to_snowpark <snowflake.snowpark.modin.pandas.io.to_snowflake>`
+        to write the Snowpark pandas DataFrame back to a Snowpark table.
+
+        This API supports table names, SELECT queries (including those that use CTEs), CTEs with anonymous stored procedures
+        and CALL queries, and is read only. To interact with Snowflake objects, e.g., listing tables, deleting tables or appending columns use the
+        `Snowflake Python Connector <https://docs.snowflake.com/en/developer-guide/python-connector/python-connector>`_, or Snowpark's
+        Session object which can be retrieved via `pd.session`.
+
+        Snowpark pandas provides the same consistency and isolation guarantees for `read_snowflake` as if local files were read. Depending on the type of source, `pd.read_snowflake` will do one of the following
+        at the time of calling `pd.read_snowflake`:
+
+            * For a table referenced by `name_or_query` the base table is snapshotted and the snapshot is used to back the resulting DataFrame.
+
+            * For SELECT queries of the form `SELECT * FROM {table_name}` the base table is snapshotted as though it were referenced directly from `pd.read_snowflake`,
+              and the snapshot will be used to back the resulting DataFrame as above.
+
+            * In the following cases, a temporary table is created and snapshotted, and the snapshot of the temporary table is used to back the resulting
+              DataFrame.
+
+                * For VIEWs, SECURE VIEWs, and TEMPORARY VIEWs, a temporary table is created as a materialized copy of the view at
+                  the time of calling `pd.read_snowflake` whether `pd.read_snowflake` is called as `pd.read_snowflake("SELECT * FROM {view_name}")` or
+                  `pd.read_snowflake(view_name)`.
+
+                * For more complex SELECT queries, including those with ORDER BY's or CTEs, the query is evaluated, and a temporary
+                  table is created with the result at the time of calling `pd.read_snowflake`.
+
+                * For CTEs with anonymous stored procedures and CALL queries, the procedure is evaluated at the time of calling `pd.read_snowflake`,
+                  and a temporary table is created with the result.
+
+        Any changes to the base table(s) or view(s) of the queries (whether the query is a SELECT query or a CTE with an anonymous stored procedure) that
+        happen after calling `pd.read_snowflake` will not be reflected in the DataFrame object returned by `pd.read_snowflake`.
+
+    Examples:
+
+        Let's create a Snowflake table using SQL first for demonstrating the behavior of
+        ``index_col`` and ``columns``:
+
+        >>> session = pd.session
+        >>> table_name = "RESULT"
+        >>> create_result = session.sql(f"CREATE TEMP TABLE {table_name} (A int, B int, C int)").collect()
+        >>> insert_result = session.sql(f"INSERT INTO {table_name} VALUES(1, 2, 3)").collect()
+        >>> session.table(table_name).show()
+        -------------------
+        |"A"  |"B"  |"C"  |
+        -------------------
+        |1    |2    |3    |
+        -------------------
+        <BLANKLINE>
+
+        - When ``index_col`` and ``columns`` are both not specified, a Snowpark pandas DataFrame
+          will have a default index from 0 to n-1, where n is the number of rows in the table,
+          and have all columns in the Snowflake table as data columns.
+
+          >>> import snowflake.snowpark.modin.pandas as pd
+          >>> pd.read_snowflake(table_name)   # doctest: +NORMALIZE_WHITESPACE
+             A  B  C
+          0  1  2  3
+
+        - When ``index_col`` is specified and ``columns`` is not specified, ``index_col``
+          will be used as index columns in Snowpark pandas DataFrame and rest of columns in the
+          Snowflake table will be data columns. Note that duplication is allowed and
+          duplicate pandas labels are maintained.
+
+          >>> pd.read_snowflake(table_name, index_col="A")   # doctest: +NORMALIZE_WHITESPACE
+             B  C
+          A
+          1  2  3
+
+          >>> pd.read_snowflake(table_name, index_col=["A", "B"])   # doctest: +NORMALIZE_WHITESPACE
+               C
+          A B
+          1 2  3
+
+          >>> pd.read_snowflake(table_name, index_col=["A", "A", "B"])  # doctest: +NORMALIZE_WHITESPACE
+                 C
+          A A B
+          1 1 2  3
+
+        - When ``index_col`` is not specified and ``columns`` is specified, a Snowpark pandas DataFrame
+          will have a default index from 0 to n-1 and ``columns`` as data columns.
+
+          >>> pd.read_snowflake(table_name, columns=["A"])  # doctest: +NORMALIZE_WHITESPACE
+             A
+          0  1
+
+          >>> pd.read_snowflake(table_name, columns=["A", "B"])  # doctest: +NORMALIZE_WHITESPACE
+             A  B
+          0  1  2
+
+          >>> pd.read_snowflake(table_name, columns=["A", "A", "B"])  # doctest: +NORMALIZE_WHITESPACE
+             A  A  B
+          0  1  1  2
+
+        - When ``index_col`` and ``columns`` are specified, ``index_col``
+          will be used as index columns and ``columns`` will be used as data columns.
+          ``index_col`` doesn't need to be a part of ``columns``.
+
+          >>> pd.read_snowflake(table_name, index_col=["A"], columns=["B", "C"])  # doctest: +NORMALIZE_WHITESPACE
+             B  C
+          A
+          1  2  3
+
+          >>> pd.read_snowflake(table_name, index_col=["A", "B"], columns=["A", "B"])  # doctest: +NORMALIZE_WHITESPACE
+               A  B
+          A B
+          1 2  1  2
+
+        Examples of `pd.read_snowflake` using SQL queries:
+
+        >>> session = pd.session
+        >>> table_name = "RESULT"
+        >>> create_result = session.sql(f"CREATE OR REPLACE TEMP TABLE {table_name} (A int, B int, C int)").collect()
+        >>> insert_result = session.sql(f"INSERT INTO {table_name} VALUES(1, 2, 3),(-1, -2, -3)").collect()
+        >>> session.table(table_name).show()
+        -------------------
+        |"A"  |"B"  |"C"  |
+        -------------------
+        |1    |2    |3    |
+        |-1   |-2   |-3   |
+        -------------------
+        <BLANKLINE>
+
+        - When ``index_col`` is not specified, a Snowpark pandas DataFrame
+          will have a default index from 0 to n-1, where n is the number of rows in the table.
+
+          >>> import snowflake.snowpark.modin.pandas as pd
+          >>> pd.read_snowflake(f"SELECT * FROM {table_name}")   # doctest: +NORMALIZE_WHITESPACE
+             A  B  C
+          0  1  2  3
+          1 -1 -2 -3
+
+        - When ``index_col`` is specified, it
+          will be used as index columns in Snowpark pandas DataFrame and rest of columns in the
+          Snowflake table will be data columns. Note that duplication is allowed and
+          duplicate pandas labels are maintained.
+
+          >>> pd.read_snowflake(f"SELECT * FROM {table_name}", index_col="A")   # doctest: +NORMALIZE_WHITESPACE
+              B  C
+          A
+           1  2  3
+          -1 -2 -3
+
+          >>> pd.read_snowflake(f"SELECT * FROM {table_name}", index_col=["A", "B"])   # doctest: +NORMALIZE_WHITESPACE
+                 C
+          A  B
+           1  2  3
+          -1 -2 -3
+
+          >>> pd.read_snowflake(f"SELECT * FROM {table_name}", index_col=["A", "A", "B"])  # doctest: +NORMALIZE_WHITESPACE
+                    C
+          A  A  B
+           1  1  2  3
+          -1 -1 -2 -3
+
+        - More complex queries can also be passed in.
+
+          >>> pd.read_snowflake(f"SELECT * FROM {table_name} WHERE A > 0")  # doctest: +NORMALIZE_WHITESPACE
+             A  B  C
+          0  1  2  3
+
+        - SQL comments can also be included, and will be ignored.
+
+          >>> pd.read_snowflake(f"-- SQL Comment 1\\nSELECT * FROM {table_name} WHERE A > 0")
+             A  B  C
+          0  1  2  3
+
+          >>> pd.read_snowflake(f'''-- SQL Comment 1
+          ... -- SQL Comment 2
+          ... SELECT * FROM {table_name} WHERE A > 0
+          ... -- SQL Comment 3''')
+             A  B  C
+          0  1  2  3
+
+        - Note that in the next example, `sort_values` is called to impose an ordering on the DataFrame.
+
+          >>> # Compute all Fibonacci numbers less than 100.
+          ... pd.read_snowflake(f'''WITH RECURSIVE current_f (current_val, previous_val) AS
+          ... (
+          ...   SELECT 0, 1
+          ...   UNION ALL
+          ...   SELECT current_val + previous_val, current_val FROM current_f
+          ...   WHERE current_val + previous_val < 100
+          ... )
+          ... SELECT current_val FROM current_f''').sort_values("CURRENT_VAL").reset_index(drop=True)
+              CURRENT_VAL
+          0             0
+          1             1
+          2             1
+          3             2
+          4             3
+          5             5
+          6             8
+          7            13
+          8            21
+          9            34
+          10           55
+          11           89
+
+          >>> pd.read_snowflake(f'''WITH T1 AS (SELECT SQUARE(A) AS A2, SQUARE(B) AS B2, SQUARE(C) AS C2 FROM {table_name}),
+          ... T2 AS (SELECT SQUARE(A2) AS A4, SQUARE(B2) AS B4, SQUARE(C2) AS C4 FROM T1),
+          ... T3 AS (SELECT * FROM T1 UNION ALL SELECT * FROM T2)
+          ... SELECT * FROM T3''')  # doctest: +NORMALIZE_WHITESPACE
+              A2    B2    C2
+          0  1.0   4.0   9.0
+          1  1.0   4.0   9.0
+          2  1.0  16.0  81.0
+          3  1.0  16.0  81.0
+
+        - Anonymous Stored Procedures (using CTEs) may also be used (although special care must be taken with respect to indentation of the code block,
+          since the entire string encapsulated by the `$$` will be passed directly to a Python interpreter. In the example below, the lines within
+          the function are indented, but not the import statement or function definition). The output schema must be specified when defining
+          an anonymous stored procedure.
+
+          >>> pd.read_snowflake('''WITH filter_rows AS PROCEDURE (table_name VARCHAR, column_to_filter VARCHAR, value NUMBER)
+          ... RETURNS TABLE(A NUMBER, B NUMBER, C NUMBER)
+          ... LANGUAGE PYTHON
+          ... RUNTIME_VERSION = '3.8'
+          ... PACKAGES = ('snowflake-snowpark-python')
+          ... HANDLER = 'filter_rows'
+          ... AS $$from snowflake.snowpark.functions import col
+          ... def filter_rows(session, table_name, column_to_filter, value):
+          ...   df = session.table(table_name)
+          ...   return df.filter(col(column_to_filter) == value)$$
+          ... ''' + f"CALL filter_rows('{table_name}', 'A', 1)")
+             A  B  C
+          0  1  2  3
+
+        - An example using an anonymous stored procedure defined in Scala.
+
+          >>> pd.read_snowflake('''
+          ... WITH filter_rows AS PROCEDURE (table_name VARCHAR, column_to_filter VARCHAR, value NUMBER)
+          ... Returns TABLE(A NUMBER, B NUMBER, C NUMBER)
+          ... LANGUAGE SCALA
+          ... RUNTIME_VERSION = '2.12'
+          ... PACKAGES = ('com.snowflake:snowpark:latest')
+          ... HANDLER = 'Filter.filterRows'
+          ... AS $$
+          ... import com.snowflake.snowpark.functions._
+          ... import com.snowflake.snowpark._
+          ...
+          ... object Filter {
+          ...   def filterRows(session: Session, tableName: String, column_to_filter: String, value: Int): DataFrame = {
+          ...       val table = session.table(tableName)
+          ...       val filteredRows = table.filter(col(column_to_filter) === value)
+          ...       return filteredRows
+          ...   }
+          ... }
+          ... $$
+          ... ''' + f"CALL filter_rows('{table_name}', 'A', -1)")
+             A  B  C
+          0 -1 -2 -3
+
+        - An example using a stored procedure defined via SQL using Snowpark's Session object.
+
+          >>> from snowflake.snowpark.functions import sproc
+          >>> from snowflake.snowpark.types import IntegerType, StructField, StructType, StringType
+          >>> from snowflake.snowpark.functions import col
+          >>> _ = session.sql("create or replace temp stage mystage").collect()
+          >>> session.add_packages('snowflake-snowpark-python')
+          >>> @sproc(return_type=StructType([StructField("A", IntegerType()), StructField("B", IntegerType()), StructField("C", IntegerType()), StructField("D", IntegerType())]), input_types=[StringType(), StringType(), IntegerType()], is_permanent=True, name="multiply_col_by_value", stage_location="mystage")
+          ... def select_sp(session_, tableName, col_to_multiply, value):
+          ...     df = session_.table(table_name)
+          ...     return df.select('*', (col(col_to_multiply)*value).as_("D"))
+
+          >>> pd.read_snowflake(f"CALL multiply_col_by_value('{table_name}', 'A', 2)")
+             A  B  C  D
+          0  1  2  3  2
+          1 -1 -2 -3 -2
+
+          >>> session.sql("DROP PROCEDURE multiply_col_by_value(VARCHAR, VARCHAR, NUMBER)").collect()
+          [Row(status='MULTIPLY_COL_BY_VALUE successfully dropped.')]
+
+    Note:
+        The names/labels used for the parameters of the Snowpark pandas IO functions such as index_col, columns are normalized
+        Snowflake Identifiers (The Snowflake stored and resolved Identifiers). The Normalized Snowflake Identifiers
+        are also used as default pandas label after constructing a Snowpark pandas DataFrame out of the Snowflake
+        table or Snowpark DataFrame. Following are the rules about how Normalized Snowflake Identifiers are generated:
+
+            - When the column identifier in Snowflake/Snowpark DataFrame is an unquoted object identifier,
+              it is stored and resolved as uppercase characters (e.g. `id` is stored and resolved as `ID`),
+              the valid input is an uppercase string. For example, for the column identifier ``A`` or ``a``, the
+              stored and resolved identifier is ``A``, and the valid input for the parameters can only be ``A``,
+              and the corresponding pandas label in Snowpark pandas DataFrame is ``A``. ``a`` and ``"A"`` are both invalid.
+
+            - When the column identifier in Snowflake/Snowpark DataFrame is a quoted object identifier, the case
+              of the identifier is preserved when storing and resolving the identifier
+              (e.g. `"id"` is stored and resolved as `id`), the valid input is case-sensitive string.
+              For example, for the column identifier ``"a"``, the valid input for the parameter can only be
+              ``a``, and the corresponding pandas label in Snowpark pandas DataFrame is ``a``.
+              ``"a"`` is invalid. For the column identifier ``"A"``, the valid input for the parameter can only be
+              ``A``, and the corresponding pandas label in Snowpark pandas DataFrame is ``A``, and``"A"`` is invalid.
+
+        See `Snowflake Identifier Requirements <https://docs.snowflake.com/en/sql-reference/identifiers-syntax>`_ for
+        more details about Snowflake Identifiers.
+
+        To see what are the Normalized Snowflake Identifiers for columns of a Snowpark DataFrame, you can call
+        dataframe.show() to see all column names, which is the Normalized identifier.
+
+        To see what are the Normalized Snowflake Identifiers for columns of a Snowflake table, you can call SQL query
+        `SELECT * FROM TABLE` or `DESCRIBE TABLE` to see the column names.
+    """
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    # mangle_dupe_cols has no effect starting in pandas 1.5. Exclude it from
+    # kwargs so pandas doesn't spuriously warn people not to use it.
+    f_locals.pop("mangle_dupe_cols", None)
+
+    from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import (
+        FactoryDispatcher,
+    )
+
+    return DataFrame(
+        query_compiler=FactoryDispatcher.read_snowflake(
+            name_or_query, index_col=index_col, columns=columns
+        )
+    )
+
+
+@register_pd_accessor("to_snowflake")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_snowflake(
+    obj: Union[DataFrame, Series],
+    name: Union[str, Iterable[str]],
+    if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
+    index: bool = True,
+    index_label: Optional[IndexLabel] = None,
+    table_type: Literal["", "temp", "temporary", "transient"] = "",
+) -> None:
+    """
+    Save the Snowpark pandas DataFrame or Series as a Snowflake table.
+
+    Args:
+        obj: Either a Snowpark pandas DataFrame or Series
+        name:
+            Name of the SQL table or fully-qualified object identifier
+        if_exists:
+            How to behave if table already exists. default 'fail'
+            - fail: Raise ValueError.
+            - replace: Drop the table before inserting new values.
+            - append: Insert new values to the existing table. The order of insertion is not guaranteed.
+        index: default True
+            If true, save DataFrame index columns as table columns.
+        index_label:
+            Column label for index column(s). If None is given (default) and index is True,
+            then the index names are used. A sequence should be given if the DataFrame uses MultiIndex.
+        table_type:
+            The table type of table to be created. The supported values are: ``temp``, ``temporary``,
+            and ``transient``. An empty string means to create a permanent table. Learn more about table
+            types `here <https://docs.snowflake.com/en/user-guide/tables-temp-transient.html>`_.
+
+    See also:
+        - :func:`DataFrame.to_snowflake <snowflake.snowpark.modin.pandas.DataFrame.to_snowflake>`
+        - :func:`Series.to_snowflake <snowflake.snowpark.modin.pandas.Series.to_snowflake>`
+        - :func:`read_snowflake <snowflake.snowpark.modin.pandas.io.read_snowflake>`
+    """
+    _snowpark_pandas_obj_check(obj)
+
+    return obj._query_compiler.to_snowflake(
+        name, if_exists, index, index_label, table_type
+    )
+
+
+@register_pd_accessor("to_snowpark")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_snowpark(
+    obj: Union[DataFrame, Series],
+    index: bool = True,
+    index_label: Optional[IndexLabel] = None,
+) -> SnowparkDataFrame:
+    """
+    Convert the Snowpark pandas DataFrame or Series to a Snowpark DataFrame.
+    Note that once converted to a Snowpark DataFrame, no ordering information will be preserved. You can call
+    reset_index to generate a default index column that is the same as the row position before the call to_snowpark.
+
+    Args:
+        obj: The object to be converted to Snowpark DataFrame. It must be either a Snowpark pandas DataFrame or Series
+        index: bool, default True.
+            Whether to keep the index columns in the result Snowpark DataFrame. If True, the index columns
+            will be the first set of columns. Otherwise, no index column will be included in the final Snowpark
+            DataFrame.
+        index_label: IndexLabel, default None.
+            Column label(s) to use for the index column(s). If None is given (default) and index is True,
+            then the original index column labels are used. A sequence should be given if the DataFrame uses
+            MultiIndex, and the length of the given sequence should be the same as the number of index columns.
+
+    Returns:
+        :class:`~snowflake.snowpark.dataframe.DataFrame`
+            A Snowpark DataFrame contains the index columns if index=True and all data columns of the Snowpark pandas
+            DataFrame. The identifier for the Snowpark DataFrame will be the normalized quoted identifier with
+            the same name as the pandas label.
+
+    Raises:
+         ValueError if duplicated labels occur among the index and data columns.
+         ValueError if the label used for a index or data column is None.
+
+    See also:
+        - :func:`Snowpark.DataFrame.to_snowpark_pandas <snowflake.snowpark.DataFrame.to_snowpark_pandas>`
+        - :func:`DataFrame.to_snowpark <snowflake.snowpark.modin.pandas.DataFrame.to_snowpark>`
+        - :func:`Series.to_snowpark <snowflake.snowpark.modin.pandas.Series.to_snowpark>`
+
+    Note:
+        The labels of the Snowpark pandas DataFrame or index_label provided will be used as Normalized Snowflake
+        Identifiers of the Snowpark DataFrame.
+        For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~snowflake.snowpark.modin.pandas.io.read_snowflake`
+
+    Examples::
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]})
+        >>> df
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
+        >>> snowpark_df = pd.to_snowpark(df, index_label='Order')
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        ------------------------------------
+        |"Order"  |"Animal"  |"Max Speed"  |
+        ------------------------------------
+        |2        |Parrot    |24.0         |
+        |3        |Parrot    |26.0         |
+        |1        |Falcon    |370.0        |
+        |0        |Falcon    |380.0        |
+        ------------------------------------
+        <BLANKLINE>
+        >>> snowpark_df = pd.to_snowpark(df, index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        --------------------------
+        |"Animal"  |"Max Speed"  |
+        --------------------------
+        |Parrot    |24.0         |
+        |Parrot    |26.0         |
+        |Falcon    |370.0        |
+        |Falcon    |380.0        |
+        --------------------------
+        <BLANKLINE>
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]}, index=pd.Index([3, 5, 6, 7], name="id"))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+            Animal  Max Speed
+        id
+        3  Falcon      380.0
+        5  Falcon      370.0
+        6  Parrot       24.0
+        7  Parrot       26.0
+        >>> snowpark_df = pd.to_snowpark(df)
+        >>> snowpark_df.order_by('"id"').show()
+        ---------------------------------
+        |"id"  |"Animal"  |"Max Speed"  |
+        ---------------------------------
+        |3     |Falcon    |380.0        |
+        |5     |Falcon    |370.0        |
+        |6     |Parrot    |24.0         |
+        |7     |Parrot    |26.0         |
+        ---------------------------------
+        <BLANKLINE>
+
+        MultiIndex usage
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]},
+        ...                    index=pd.MultiIndex.from_tuples([('bar', 'one'), ('foo', 'one'), ('bar', 'two'), ('foo', 'three')], names=['first', 'second']))
+        >>> df      # doctest: +NORMALIZE_WHITESPACE
+                        Animal  Max Speed
+        first second
+        bar   one     Falcon      380.0
+        foo   one     Falcon      370.0
+        bar   two     Parrot       24.0
+        foo   three   Parrot       26.0
+        >>> snowpark_df = pd.to_snowpark(df, index=True, index_label=['A', 'B'])
+        >>> snowpark_df.order_by('"A"', '"B"').show()
+        ----------------------------------------
+        |"A"  |"B"    |"Animal"  |"Max Speed"  |
+        ----------------------------------------
+        |bar  |one    |Falcon    |380.0        |
+        |bar  |two    |Parrot    |24.0         |
+        |foo  |one    |Falcon    |370.0        |
+        |foo  |three  |Parrot    |26.0         |
+        ----------------------------------------
+        <BLANKLINE>
+        >>> snowpark_df = pd.to_snowpark(df, index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        --------------------------
+        |"Animal"  |"Max Speed"  |
+        --------------------------
+        |Parrot    |24.0         |
+        |Parrot    |26.0         |
+        |Falcon    |370.0        |
+        |Falcon    |380.0        |
+        --------------------------
+        <BLANKLINE>
+        >>> snowpark_df = pd.to_snowpark(df["Animal"], index=False)
+        >>> snowpark_df.order_by('"Animal"').show()
+        ------------
+        |"Animal"  |
+        ------------
+        |Falcon    |
+        |Falcon    |
+        |Parrot    |
+        |Parrot    |
+        ------------
+        <BLANKLINE>
+    """
+    _snowpark_pandas_obj_check(obj)
+
+    return obj._query_compiler.to_snowpark(index, index_label)
+
+
+@register_pd_accessor("to_pandas")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def to_pandas(
+    obj: Union[DataFrame, Series],
+    *,
+    statement_params: Optional[dict[str, str]] = None,
+    **kwargs: Any,
+) -> Union[DataFrame, Series]:
+    """
+    Convert Snowpark pandas DataFrame or Series to pandas DataFrame or Series
+
+    Args:
+        obj: The object to be converted to native pandas. It must be either a Snowpark pandas DataFrame or Series
+        statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+    Returns:
+        pandas DataFrame or Series
+
+    See also:
+        - :func:`DataFrame.to_pandas <snowflake.snowpark.modin.pandas.DataFrame.to_pandas>`
+        - :func:`Series.to_pandas <snowflake.snowpark.modin.pandas.Series.to_pandas>`
+
+    Examples:
+
+        >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
+        ...                               'Parrot', 'Parrot'],
+        ...                    'Max Speed': [380., 370., 24., 26.]})
+        >>> pd.to_pandas(df)
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
+
+        >>> pd.to_pandas(df['Animal'])
+        0    Falcon
+        1    Falcon
+        2    Parrot
+        3    Parrot
+        Name: Animal, dtype: object
+    """
+    _snowpark_pandas_obj_check(obj)
+    return obj.to_pandas(statement_params=statement_params, *kwargs)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py
new file mode 100644
index 00000000000..c44bb6c27cc
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/pd_overrides.py
@@ -0,0 +1,655 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import inspect
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Hashable,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+)
+
+import pandas as native_pd
+from pandas._libs.lib import NoDefault, no_default
+from pandas._typing import (
+    CSVEngine,
+    DtypeArg,
+    DtypeBackend,
+    FilePath,
+    IndexLabel,
+    StorageOptions,
+)
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark.modin.pandas import DataFrame
+from snowflake.snowpark.modin.pandas.api.extensions import register_pd_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_standalone_function_decorator,
+)
+from snowflake.snowpark.modin.plugin.io.snow_io import PandasOnSnowflakeIO
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+if TYPE_CHECKING:  # pragma: no cover
+    import csv
+
+
+@_inherit_docstrings(native_pd.read_csv, apilink="pandas.read_csv")
+@register_pd_accessor("read_csv")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_csv(
+    filepath_or_buffer: FilePath,
+    *,
+    sep: Optional[Union[str, NoDefault]] = no_default,
+    delimiter: Optional[str] = None,
+    header: Optional[Union[int, Sequence[int], Literal["infer"]]] = "infer",
+    names: Optional[Union[Sequence[Hashable], NoDefault]] = no_default,
+    index_col: Optional[Union[IndexLabel, Literal[False]]] = None,
+    usecols: Optional[Union[list[Hashable], Callable]] = None,
+    dtype: Optional[DtypeArg] = None,
+    engine: Optional[CSVEngine] = None,
+    converters: Optional[dict[Hashable, Callable]] = None,
+    true_values: Optional[list[Any]] = None,
+    false_values: Optional[list[Any]] = None,
+    skipinitialspace: Optional[bool] = None,
+    skiprows: Optional[int] = 0,
+    skipfooter: Optional[int] = None,
+    nrows: Optional[int] = None,
+    na_values: Optional[Sequence[Hashable]] = None,
+    keep_default_na: Optional[bool] = None,
+    na_filter: Optional[bool] = None,
+    verbose: Optional[bool] = None,
+    skip_blank_lines: Optional[bool] = None,
+    parse_dates: Optional[
+        Union[bool, Sequence[int], Sequence[Sequence[int]], dict[str, Sequence[int]]]
+    ] = None,
+    infer_datetime_format: Optional[bool] = None,
+    keep_date_col: Optional[bool] = None,
+    date_parser: Optional[Callable] = None,
+    date_format: Optional[Union[str, dict]] = None,
+    dayfirst: Optional[bool] = None,
+    cache_dates: Optional[bool] = None,
+    iterator: bool = None,
+    chunksize: Optional[int] = None,
+    compression: Literal[
+        "infer", "gzip", "bz2", "brotli", "zstd", "deflate", "raw_deflate", "none"
+    ] = "infer",
+    thousands: Optional[str] = None,
+    decimal: Optional[str] = None,
+    lineterminator: Optional[str] = None,
+    quotechar: str = '"',
+    quoting: Optional[int] = None,
+    doublequote: bool = None,
+    escapechar: Optional[str] = None,
+    comment: Optional[str] = None,
+    encoding: Optional[str] = None,
+    encoding_errors: Optional[str] = None,
+    dialect: Optional[Union[str, "csv.Dialect"]] = None,
+    on_bad_lines: str = None,
+    delim_whitespace: Optional[bool] = None,
+    low_memory: Optional[bool] = None,
+    memory_map: Optional[bool] = None,
+    float_precision: Optional[Literal["high", "legacy"]] = None,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend = no_default,
+) -> DataFrame:
+    """
+    Read csv file(s) into a Snowpark pandas DataFrame. This API can read
+    files stored locally or on a Snowflake stage.
+
+    Snowpark pandas stages files (unless they're already staged)
+    and then reads them using Snowflake's CSV reader.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str
+        Local file location or staged file location to read from. Staged file locations
+        starts with a '@' symbol. To read a local file location with a name starting with `@`,
+        escape it using a `\\@`. For more info on staged files, `read here
+        <https://docs.snowflake.com/en/sql-reference/sql/create-stage>`_.
+    sep : str, default ','
+        Delimiter to use to separate fields in an input file. Delimiters can be
+        multiple characters in Snowpark pandas.
+    delimiter : str, default ','
+        Alias for sep.
+    header : int, list of int, None, default 'infer'
+        Row number(s) to use as the column names, and the start of the
+        data.  Default behavior is to infer the column names: if no names
+        are passed the behavior is identical to ``header=0`` and column
+        names are inferred from the first line of the file, if column
+        names are passed explicitly then the behavior is identical to
+        ``header=None``. Explicitly pass ``header=0`` to be able to
+        replace existing names. If a non-zero integer or a list of integers is passed,
+        a ``NotImplementedError`` will be raised.
+    names : array-like, optional
+        List of column names to use. If the file contains a header row,
+        then you should explicitly pass ``header=0`` to override the column names.
+        Duplicates in this list are not allowed.
+    index_col: int, str, sequence of int / str, or False, optional, default ``None``
+        Column(s) to use as the row labels of the ``DataFrame``, either given as
+        string name or column index. If a sequence of int / str is given, a
+        MultiIndex is used.
+        Note: ``index_col=False`` can be used to force pandas to *not* use the first
+        column as the index, e.g. when you have a malformed file with delimiters at
+        the end of each line.
+    usecols : list-like or callable, optional
+        Return a subset of the columns. If list-like, all elements must either
+        be positional (i.e. integer indices into the document columns) or strings
+        that correspond to column names provided either by the user in `names` or
+        inferred from the document header row(s). If ``names`` are given, the document
+        header row(s) are not taken into account. For example, a valid list-like
+        `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
+        Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
+        To instantiate a DataFrame from ``data`` with element order preserved use
+        ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
+        in ``['foo', 'bar']`` order or
+        ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
+        for ``['bar', 'foo']`` order.
+
+        If callable, the callable function will be evaluated against the column
+        names, returning names where the callable function evaluates to True. An
+        example of a valid callable argument would be ``lambda x: x.upper() in
+        ['AAA', 'BBB', 'DDD']``.
+    dtype : Type name or dict of column -> type, optional
+        Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
+        'c': 'Int64'}}
+        Use `str` or `object` together with suitable `na_values` settings
+        to preserve and not interpret dtype.
+        If converters are specified, they will be applied INSTEAD
+        of dtype conversion.
+    engine : {{'c', 'python', 'pyarrow'}}, optional
+        This parameter is not supported and will be ignored.
+    converters : dict, optional
+       This parameter is not supported and will raise an error.
+    true_values : list, optional
+        This parameter is not supported and will raise an error.
+    false_values : list, optional
+        This parameter is not supported and will raise an error.
+    skiprows: list-like, int or callable, optional
+        Line numbers to skip (0-indexed) or number of lines to skip (int)
+        at the start of the file.
+    skipfooter : int, default 0
+        This parameter is not supported and will raise an error.
+    nrows : int, optional
+        This parameter is not supported and will raise an error.
+    na_values : scalar, str, list-like, or dict, optional
+        Additional strings to recognize as NA/NaN.
+    keep_default_na : bool, default True
+       This parameter is not supported and will raise an error.
+    na_filter : bool, default True
+        This parameter is not supported and will raise an error.
+    verbose : bool, default False
+        This parameter is not supported and will raise an error.
+    skip_blank_lines : bool, default True
+        If True, skip over blank lines rather than interpreting as NaN values.
+    parse_dates : bool or list of int or names or list of lists or dict, default False
+        This parameter is not supported and will raise an error.
+    infer_datetime_format : bool, default False
+        This parameter is not supported and will raise an error.
+    keep_date_col : bool, default False
+        This parameter is not supported and will raise an error.
+    date_parser : function, optional
+        This parameter is not supported and will raise an error.
+    date_format : str or dict of column -> format, optional
+        This parameter is not supported and will raise an error.
+    dayfirst : bool, default False
+        This parameter is not supported and will raise an error.
+    cache_dates : bool, default True
+        This parameter is not supported and will be ignored.
+    iterator : bool, default False
+        This parameter is not supported and will raise an error.
+    chunksize : int, optional
+        This parameter is not supported and will be ignored.
+    compression: str, default 'infer'
+        String (constant) that specifies the current compression algorithm for the
+        data files to be loaded. Snowflake uses this option to detect how already-compressed
+        data files were compressed so that the compressed data in the files
+        can be extracted for loading.
+        `List of Snowflake standard compressions
+        <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table#format-type-options-formattypeoptions>`_ .
+    thousands : str, optional
+        This parameter is not supported and will raise an error.
+    decimal : str, default '.'
+        This parameter is not supported and will raise an error.
+    lineterminator : str (length 1), optional
+        This parameter is not supported and will raise an error.
+    quotechar : str (length 1), optional
+        The character used to denote the start and end of a quoted item. Quoted
+        items can include the delimiter and it will be ignored.
+    quoting : int or csv.QUOTE_* instance, default 0
+        This parameter is not supported and will raise an error.
+    doublequote : bool, default ``True``
+        This parameter is not supported and will raise an error.
+    escapechar : str (length 1), optional
+        One-character string used to escape other characters.
+    comment : str, optional
+        This parameter is not supported and will raise an error.
+    encoding : str, default 'utf-8'
+        Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Snowflake
+        standard encodings <https://docs.snowflake.com/en/sql-reference/sql/copy-into-tables>`_ .
+    encoding_errors : str, optional, default "strict"
+        This parameter is not supported and will raise an error.
+    dialect : str or csv.Dialect, optional
+        This parameter is not supported and will raise an error.
+    on_bad_lines : {{'error', 'warn', 'skip'}} or callable, default 'error'
+        This parameter is not supported and will raise an error.
+    delim_whitespace : bool, default False
+        This parameter is not supported and will raise an error.
+    low_memory : bool, default True
+        This parameter is not supported and will be ignored.
+    memory_map : bool, default False
+        This parameter is not supported and will be ignored.
+    float_precision : str, optional
+        This parameter is not supported and will be ignored.
+    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+        This parameter is not supported and will be ignored.
+
+    Returns
+    -------
+    SnowparkPandasDataFrame
+
+    Raises
+    ------
+    NotImplementedError if a parameter is not supported.
+
+    Notes
+    -----
+    Both local files and files staged on Snowflake can be passed into
+    ``filepath_or_buffer``. A single file or a folder that matches
+    a set of files can be passed into ``filepath_or_buffer``. The order of rows in the
+    dataframe may be different than the order of records in an input file. When reading
+    multiple files, there is no deterministic order in which the files are read.
+
+    Examples
+    --------
+    Read local csv file.
+
+    >>> import csv
+    >>> import tempfile
+    >>> temp_dir = tempfile.TemporaryDirectory()
+    >>> temp_dir_name = temp_dir.name
+    >>> with open(f'{temp_dir_name}/data.csv', 'w') as f:
+    ...     writer = csv.writer(f)
+    ...     writer.writerows([['c1','c2','c3'], [1,2,3], [4,5,6], [7,8,9]])
+    >>> import snowflake.snowpark.modin.pandas as pd
+    >>> df = pd.read_csv(f'{temp_dir_name}/data.csv')
+    >>> df
+       c1  c2  c3
+    0   1   2   3
+    1   4   5   6
+    2   7   8   9
+
+    Read staged csv file.
+
+    >>> _ = session.sql("create or replace temp stage mytempstage").collect()
+    >>> _ = session.file.put(f'{temp_dir_name}/data.csv', '@mytempstage/myprefix')
+    >>> df2 = pd.read_csv('@mytempstage/myprefix/data.csv')
+    >>> df2
+       c1  c2  c3
+    0   1   2   3
+    1   4   5   6
+    2   7   8   9
+
+    Read csv files from a local folder.
+
+    >>> with open(f'{temp_dir_name}/data2.csv', 'w') as f:
+    ...     writer = csv.writer(f)
+    ...     writer.writerows([['c1','c2','c3'], [1,2,3], [4,5,6], [7,8,9]])
+    >>> df3 = pd.read_csv(f'{temp_dir_name}')
+    >>> df3
+       c1  c2  c3
+    0   1   2   3
+    1   4   5   6
+    2   7   8   9
+    3   1   2   3
+    4   4   5   6
+    5   7   8   9
+
+    Read csv files from a staged location.
+
+    >>> _ = session.file.put(f'{temp_dir_name}/data2.csv', '@mytempstage/myprefix')
+    >>> df4 = pd.read_csv('@mytempstage/myprefix')
+    >>> df4
+       c1  c2  c3
+    0   1   2   3
+    1   4   5   6
+    2   7   8   9
+    3   1   2   3
+    4   4   5   6
+    5   7   8   9
+
+    >>> temp_dir.cleanup()
+    """
+    _pd_read_csv_signature = {
+        val.name for val in inspect.signature(native_pd.read_csv).parameters.values()
+    }
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
+    return pd.DataFrame(query_compiler=PandasOnSnowflakeIO.read_csv(**kwargs))
+
+
+@_inherit_docstrings(native_pd.read_json, apilink="pandas.read_json")
+@register_pd_accessor("read_json")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_json(
+    path_or_buf: FilePath,
+    *,
+    orient: Optional[str] = None,
+    typ: Optional[Literal["frame", "series"]] = None,
+    dtype: Optional[DtypeArg] = None,
+    convert_axes: Optional[bool] = None,
+    convert_dates: Optional[Union[bool, list[str]]] = None,
+    keep_default_dates: Optional[bool] = None,
+    precise_float: Optional[bool] = None,
+    date_unit: Optional[str] = None,
+    encoding: Optional[str] = None,
+    encoding_errors: Optional[str] = None,
+    lines: Optional[bool] = None,
+    chunksize: Optional[int] = None,
+    compression: Literal[
+        "infer", "gzip", "bz2", "brotli", "zstd", "deflate", "raw_deflate", "none"
+    ] = "infer",
+    nrows: Optional[int] = None,
+    storage_options: StorageOptions = None,
+    dtype_backend: DtypeBackend = no_default,
+    engine: Optional[Literal["ujson", "pyarrow"]] = None,
+) -> DataFrame:
+    """
+    Read new-line delimited json file(s) into a Snowpark pandas DataFrame. This API can read
+    files stored locally or on a Snowflake stage.
+
+    Snowpark pandas first stages files (unless they're already staged)
+    and then reads them using Snowflake's JSON reader.
+
+    Parameters
+    ----------
+    path_or_buf : str
+        Local file location or staged file location to read from. Staged file locations
+        starts with a '@' symbol. To read a local file location with a name starting with `@`,
+        escape it using a `\\@`. For more info on staged files, `read here
+        <https://docs.snowflake.com/en/sql-reference/sql/create-stage>`_.
+
+    orient : str
+        This parameter is not supported and will raise an error.
+
+    typ : {{'frame', 'series'}}, default 'frame'
+        This parameter is not supported and will raise an error.
+
+    dtype : bool or dict, default None
+        This parameter is not supported and will raise an error.
+
+    convert_axes : bool, default None
+        This parameter is not supported and will raise an error.
+
+    convert_dates : bool or list of str, default True
+        This parameter is not supported and will raise an error.
+
+    keep_default_dates : bool, default True
+        This parameter is not supported and will raise an error.
+
+    precise_float : bool, default False
+        This parameter is not supported and will be ignored.
+
+    date_unit : str, default None
+        This parameter is not supported and will raise an error.
+
+    encoding : str, default is 'utf-8'
+        Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Snowflake
+        standard encodings <https://docs.snowflake.com/en/sql-reference/sql/copy-into-tables>`_ .
+
+    encoding_errors : str, optional, default "strict"
+        This parameter is not supported and will raise an error.
+
+    lines : bool, default False
+        This parameter is not supported and will raise an error.
+
+    chunksize : int, optional
+        This parameter is not supported and will raise an error.
+
+    compression : str, default 'infer'
+        String (constant) that specifies the current compression algorithm for the
+        data files to be loaded. Snowflake uses this option to detect how already-compressed
+        data files were compressed so that the compressed data in the files
+        can be extracted for loading.
+        `List of Snowflake standard compressions
+        <https://docs.snowflake.com/en/sql-reference/sql/copy-into-table#format-type-options-formattypeoptions>`_ .
+
+    nrows : int, optional
+        This parameter is not supported and will raise an error.
+
+    storage_options : dict, optional
+        This parameter is not supported and will be ignored.
+
+    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+        This parameter is not supported and will be ignored.
+
+    engine : {'ujson', 'pyarrow'}, default 'ujson'
+        This parameter is not supported and will be ignored.
+
+    Returns
+    -------
+    Snowpark pandas DataFrame
+
+    Raises
+    ------
+    NotImplementedError if a parameter is not supported.
+
+    Notes
+    -----
+    Both local files and files staged on Snowflake can be passed into
+    ``path_or_buf``. A single file or a folder that matches
+    a set of files can be passed into ``path_or_buf``. There is no deterministic order
+    in which the files are read.
+
+    Examples
+    --------
+
+    Read local json file.
+
+    >>> import tempfile
+    >>> import json
+    >>> temp_dir = tempfile.TemporaryDirectory()
+    >>> temp_dir_name = temp_dir.name
+
+    >>> data = {'A': "snowpark!", 'B': 3, 'C': [5, 6]}
+    >>> with open(f'{temp_dir_name}/snowpark_pandas.json', 'w') as f:
+    ...     json.dump(data, f)
+
+    >>> import snowflake.snowpark.modin.pandas as pd
+    >>> df = pd.read_json(f'{temp_dir_name}/snowpark_pandas.json')
+    >>> df
+               A  B       C
+    0  snowpark!  3  [5, 6]
+
+    Read staged json file.
+
+    >>> _ = session.sql("create or replace temp stage mytempstage").collect()
+    >>> _ = session.file.put(f'{temp_dir_name}/snowpark_pandas.json', '@mytempstage/myprefix')
+    >>> df2 = pd.read_json('@mytempstage/myprefix/snowpark_pandas.json')
+    >>> df2
+               A  B       C
+    0  snowpark!  3  [5, 6]
+
+    Read json files from a local folder.
+
+    >>> with open(f'{temp_dir_name}/snowpark_pandas2.json', 'w') as f:
+    ...     json.dump(data, f)
+    >>> df3 = pd.read_json(f'{temp_dir_name}')
+    >>> df3
+               A  B       C
+    0  snowpark!  3  [5, 6]
+    1  snowpark!  3  [5, 6]
+
+    Read json files from a staged location.
+
+    >>> _ = session.file.put(f'{temp_dir_name}/snowpark_pandas2.json', '@mytempstage/myprefix')
+    >>> df4 = pd.read_json('@mytempstage/myprefix')
+    >>> df4
+               A  B       C
+    0  snowpark!  3  [5, 6]
+    1  snowpark!  3  [5, 6]
+    """
+    _pd_read_json_signature = {
+        val.name for val in inspect.signature(native_pd.read_json).parameters.values()
+    }
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_json_signature}
+    return DataFrame(
+        query_compiler=PandasOnSnowflakeIO.read_json(
+            **kwargs,
+        )
+    )
+
+
+@_inherit_docstrings(native_pd.read_parquet, apilink="pandas.read_parquet")
+@register_pd_accessor("read_parquet")
+@snowpark_pandas_telemetry_standalone_function_decorator
+def read_parquet(
+    path: FilePath,
+    engine: Optional[str] = None,
+    columns: Optional[list[str]] = None,
+    storage_options: StorageOptions = None,
+    use_nullable_dtypes: Union[bool, NoDefault] = no_default,
+    dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+    filesystem: str = None,
+    filters: Optional[Union[list[tuple], list[list[tuple]]]] = None,
+    **kwargs,
+):
+    """
+    Read parquet file(s) into a Snowpark pandas DataFrame. This API can read
+    files stored locally or on a Snowflake stage.
+
+    Snowpark pandas stages files (unless they're already staged)
+    and then reads them using Snowflake's parquet reader.
+
+    Parameters
+    ----------
+    path : str
+        Local file location or staged file location to read from. Staged file locations
+        starts with a '@' symbol. To read a local file location with a name starting with `@`,
+        escape it using a `\\@`. For more info on staged files, `read here
+        <https://docs.snowflake.com/en/sql-reference/sql/create-stage>`_.
+
+    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default None
+        This parameter is not supported and will be ignored.
+
+    storage_options : StorageOptions, default None
+        This parameter is not supported and will be ignored.
+
+    columns : list, default None
+        If not None, only these columns will be read from the file.
+
+    use_nullable_dtypes : bool, default False
+        This parameter is not supported and will raise an error.
+
+    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
+        This parameter is not supported and will be ignored.
+
+    filesystem : fsspec or pyarrow filesystem, default None
+        This parameter is not supported and will be ignored.
+
+    filters : List[Tuple] or List[List[Tuple]], default None
+        This parameter is not supported and will be ignored.
+
+    **kwargs : Any, default None
+        This parameter is not supported and will be ignored.
+
+    Returns
+    -------
+    Snowpark pandas DataFrame
+
+    Raises
+    ------
+    NotImplementedError if a parameter is not supported.
+
+    Notes
+    -----
+    Both local files and files staged on Snowflake can be passed into
+    ``path``. A single file or a folder that matches
+    a set of files can be passed into ``path``. The order of rows in the
+    dataframe may be different from the order of records in an input file. When reading
+    multiple files, there is no deterministic order in which the files are read.
+
+    Examples
+    --------
+
+    Read local parquet file.
+
+    >>> import pandas as native_pd
+    >>> import tempfile
+    >>> temp_dir = tempfile.TemporaryDirectory()
+    >>> temp_dir_name = temp_dir.name
+
+    >>> df = native_pd.DataFrame(
+    ...     {"foo": range(3), "bar": range(5, 8)}
+    ...    )
+    >>> df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+
+    >>> _ = df.to_parquet(f'{temp_dir_name}/snowpark-pandas.parquet')
+    >>> restored_df = pd.read_parquet(f'{temp_dir_name}/snowpark-pandas.parquet')
+    >>> restored_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+
+    >>> restored_bar = pd.read_parquet(f'{temp_dir_name}/snowpark-pandas.parquet', columns=["bar"])
+    >>> restored_bar
+       bar
+    0    5
+    1    6
+    2    7
+
+    Read staged parquet file.
+
+    >>> _ = session.sql("create or replace temp stage mytempstage").collect()
+    >>> _ = session.file.put(f'{temp_dir_name}/snowpark-pandas.parquet', '@mytempstage/myprefix')
+    >>> df2 = pd.read_parquet('@mytempstage/myprefix/snowpark-pandas.parquet')
+    >>> df2
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+
+    Read parquet files from a local folder.
+
+    >>> _ = df.to_parquet(f'{temp_dir_name}/snowpark-pandas2.parquet')
+    >>> df3 = pd.read_parquet(f'{temp_dir_name}')
+    >>> df3
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    0    5
+    4    1    6
+    5    2    7
+
+    Read parquet files from a staged location.
+
+    >>> _ = session.file.put(f'{temp_dir_name}/snowpark-pandas2.parquet', '@mytempstage/myprefix')
+    >>> df3 = pd.read_parquet('@mytempstage/myprefix')
+    >>> df3
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    0    5
+    4    1    6
+    5    2    7
+    """
+    _pd_read_parquet_signature = {
+        val.name
+        for val in inspect.signature(native_pd.read_parquet).parameters.values()
+    }
+    _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+    kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_parquet_signature}
+
+    return pd.DataFrame(query_compiler=PandasOnSnowflakeIO.read_parquet(**kwargs))
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/series_extensions.py
new file mode 100644
index 00000000000..7eb829e499d
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_extensions.py
@@ -0,0 +1,205 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+File containing Series APIs defined in Snowpark pandas but not the Modin API layer, such
+as `Series.to_snowflake`.
+"""
+
+from collections.abc import Iterable
+from typing import Any, Literal, Optional, Union
+
+from pandas._typing import IndexLabel
+
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.pandas import Series
+from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+
+
+@register_series_accessor("to_snowflake")
+@snowpark_pandas_telemetry_method_decorator
+def to_snowflake(
+    self,
+    name: Union[str, Iterable[str]],
+    if_exists: Optional[Literal["fail", "replace", "append"]] = "fail",
+    index: bool = True,
+    index_label: Optional[IndexLabel] = None,
+    table_type: Literal["", "temp", "temporary", "transient"] = "",
+) -> None:
+    """
+    Save the Snowpark pandas Series as a Snowflake table.
+
+    Args:
+        name:
+            Name of the SQL table or fully-qualified object identifier
+        if_exists:
+            How to behave if table already exists. default 'fail'
+            - fail: Raise ValueError.
+            - replace: Drop the table before inserting new values.
+            - append: Insert new values to the existing table. The order of insertion is not guaranteed.
+        index: default True
+            If true, save Series index columns as table columns.
+        index_label:
+            Column label for index column(s). If None is given (default) and index is True,
+            then the index names are used. A sequence should be given if the DataFrame uses MultiIndex.
+        table_type:
+            The table type of table to be created. The supported values are: ``temp``, ``temporary``,
+            and ``transient``. An empty string means to create a permanent table. Learn more about table
+            types `here <https://docs.snowflake.com/en/user-guide/tables-temp-transient.html>`_.
+
+    See Also:
+        - :func:`to_snowflake <snowflake.snowpark.modin.pandas.io.to_snowflake>`
+        - :func:`DataFrame.to_snowflake <snowflake.snowpark.modin.pandas.DataFrame.to_snowflake>`
+        - :func:`read_snowflake <snowflake.snowpark.modin.pandas.io.read_snowflake>`
+
+    """
+    self._query_compiler.to_snowflake(name, if_exists, index, index_label, table_type)
+
+
+@register_series_accessor("to_snowpark")
+@snowpark_pandas_telemetry_method_decorator
+def to_snowpark(
+    self, index: bool = True, index_label: Optional[IndexLabel] = None
+) -> SnowparkDataFrame:
+    """
+    Convert the Snowpark pandas Series to a Snowpark DataFrame.
+    Note that once converted to a Snowpark DataFrame, no ordering information will be preserved. You can call
+    reset_index to generate a default index column that is the same as the row position before the call to_snowpark.
+
+    Args:
+        index: bool, default True.
+            Whether to keep the index columns in the result Snowpark DataFrame. If True, the index columns
+            will be the first set of columns. Otherwise, no index column will be included in the final Snowpark
+            DataFrame.
+        index_label: IndexLabel, default None.
+            Column label(s) to use for the index column(s). If None is given (default) and index is True,
+            then the original index column labels are used. A sequence should be given if the DataFrame uses
+            MultiIndex, and the length of the given sequence should be the same as the number of index columns.
+
+    Returns:
+       Snowpark :class:`~snowflake.snowpark.dataframe.DataFrame`
+            A Snowpark DataFrame contains the index columns if index=True and all data columns of the Snowpark pandas
+            DataFrame. The identifier for the Snowpark DataFrame will be the normalized quoted identifier with
+            the same name as the pandas label.
+
+    Raises:
+         ValueError if duplicated labels occur among the index and data columns.
+         ValueError if the label used for a index or data column is None.
+
+    See also:
+        - :func:`to_snowpark <snowflake.snowpark.modin.pandas.io.to_snowpark>`
+        - :func:`Series.to_snowpark <snowflake.snowpark.modin.pandas.Series.to_snowpark>`
+
+    Note:
+        The labels of the Snowpark pandas DataFrame or index_label provided will be used as Normalized Snowflake
+        Identifiers of the Snowpark DataFrame.
+        For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~snowflake.snowpark.modin.pandas.io.read_snowflake`
+
+    Examples::
+
+        >>> ser = pd.Series([390., 350., 30., 20.],
+        ...                 index=['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+        ...                 name="Max Speed")
+        >>> ser
+        Falcon    390.0
+        Falcon    350.0
+        Parrot     30.0
+        Parrot     20.0
+        Name: Max Speed, dtype: float64
+        >>> snowpark_df = ser.to_snowpark(index_label="Animal")
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        --------------------------
+        |"Animal"  |"Max Speed"  |
+        --------------------------
+        |Parrot    |20.0         |
+        |Parrot    |30.0         |
+        |Falcon    |350.0        |
+        |Falcon    |390.0        |
+        --------------------------
+        <BLANKLINE>
+        >>> snowpark_df = ser.to_snowpark(index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        ---------------
+        |"Max Speed"  |
+        ---------------
+        |20.0         |
+        |30.0         |
+        |350.0        |
+        |390.0        |
+        ---------------
+        <BLANKLINE>
+
+        MultiIndex usage
+        >>> ser = pd.Series([390., 350., 30., 20.],
+        ...                 index=pd.MultiIndex.from_tuples([('bar', 'one'), ('foo', 'one'), ('bar', 'two'), ('foo', 'three')], names=['first', 'second']),
+        ...                 name="Max Speed")
+        >>> ser
+        first  second
+        bar    one       390.0
+        foo    one       350.0
+        bar    two        30.0
+        foo    three      20.0
+        Name: Max Speed, dtype: float64
+        >>> snowpark_df = ser.to_snowpark(index=True, index_label=['A', 'B'])
+        >>> snowpark_df.order_by('"A"', '"B"').show()
+        -----------------------------
+        |"A"  |"B"    |"Max Speed"  |
+        -----------------------------
+        |bar  |one    |390.0        |
+        |bar  |two    |30.0         |
+        |foo  |one    |350.0        |
+        |foo  |three  |20.0         |
+        -----------------------------
+        <BLANKLINE>
+        >>> snowpark_df = ser.to_snowpark(index=False)
+        >>> snowpark_df.order_by('"Max Speed"').show()
+        ---------------
+        |"Max Speed"  |
+        ---------------
+        |20.0         |
+        |30.0         |
+        |350.0        |
+        |390.0        |
+        ---------------
+        <BLANKLINE>
+    """
+    return self._query_compiler.to_snowpark(index, index_label)
+
+
+@register_series_accessor("to_pandas")
+@snowpark_pandas_telemetry_method_decorator
+def to_pandas(
+    self,
+    *,
+    statement_params: Optional[dict[str, str]] = None,
+    **kwargs: Any,
+) -> Series:
+    """
+    Convert Snowpark pandas Series to pandas Series
+
+    Args:
+        statement_params: Dictionary of statement level parameters to be set while executing this action.
+
+    See Also:
+        - :func:`to_pandas <snowflake.snowpark.modin.pandas.io.to_pandas>`
+        - :func:`DataFrame.to_pandas <snowflake.snowpark.modin.pandas.DataFrame.to_pandas>`
+
+    Returns:
+        pandas Series
+
+    >>> s = pd.Series(['Falcon', 'Falcon',
+    ...                 'Parrot', 'Parrot'],
+    ...                 name = 'Animal')
+    >>> s.to_pandas()
+    0    Falcon
+    1    Falcon
+    2    Parrot
+    3    Parrot
+    Name: Animal, dtype: object
+    """
+    return self._to_pandas(statement_params=statement_params, **kwargs)
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
new file mode 100644
index 00000000000..cbb44b6c1d1
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
@@ -0,0 +1,178 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""
+File containing Series APIs defined in the Modin API layer, but with different behavior in Snowpark
+pandas, such as `Series.memory_usage`.
+"""
+
+from typing import Union
+
+import pandas as native_pd
+
+from snowflake.snowpark.modin import pandas as pd  # noqa: F401
+from snowflake.snowpark.modin.pandas import Series
+from snowflake.snowpark.modin.pandas.api.extensions import register_series_accessor
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    snowpark_pandas_telemetry_method_decorator,
+)
+from snowflake.snowpark.modin.plugin._typing import ListLike
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import _inherit_docstrings
+
+
+@_inherit_docstrings(native_pd.Series.memory_usage, apilink="pandas.Series")
+@register_series_accessor("memory_usage")
+@snowpark_pandas_telemetry_method_decorator
+def memory_usage(self, index: bool = True, deep: bool = False) -> int:
+    """
+    Return zero bytes for memory_usage
+    """
+    # TODO: SNOW-1264697: push implementation down to query compiler
+    return 0
+
+
+@_inherit_docstrings(native_pd.Series.infer_objects, apilink="pandas.Series")
+@register_series_accessor("infer_objects")
+@snowpark_pandas_telemetry_method_decorator
+def infer_objects(self) -> Series:  # pragma: no cover # noqa: RT01, D200
+    """
+    Attempt to infer better dtypes for object columns.
+    """
+    ErrorMessage.not_implemented()
+    return self.__constructor__(query_compiler=self._query_compiler.infer_objects())
+
+
+@_inherit_docstrings(native_pd.Series.nunique, apilink="pandas.Series")
+@register_series_accessor("nunique")
+@snowpark_pandas_telemetry_method_decorator
+def nunique(self, dropna: bool = True) -> int:
+    """
+    Return number of unique elements in the series.
+
+    Excludes NA values by default. Snowpark pandas API does not distinguish between different NaN types like None,
+    pd.NA or np.nan and treats them as the same
+
+    Parameters
+    ----------
+    dropna : bool, default True
+        Don't include NaN in the count.
+
+    Returns
+    -------
+    int
+
+    Examples
+    --------
+    >>> import snowflake.snowpark.modin.pandas as pd
+    >>> import numpy as np
+    >>> s = pd.Series([1, 3, 5, 7, 7])
+    >>> s
+    0    1
+    1    3
+    2    5
+    3    7
+    4    7
+    dtype: int8
+
+    >>> s.nunique()
+    4
+
+    >>> s = pd.Series([pd.NaT, np.nan, pd.NA, None, 1])
+    >>> s.nunique()
+    1
+
+    >>> s.nunique(dropna=False)
+    2
+
+    """
+    # TODO: SNOW-1264688: remove this override
+    return super(Series, self).nunique(dropna=dropna)
+
+
+@_inherit_docstrings(native_pd.Series.isin, apilink="pandas.Series")
+@register_series_accessor("isin")
+@snowpark_pandas_telemetry_method_decorator
+def isin(self, values: Union[set, ListLike]) -> Series:
+    """
+    Whether elements in Series are contained in `values`.
+
+    Return a boolean Series showing whether each element in the Series
+    matches an element in the passed sequence of `values`.
+
+    Caution
+    -------
+    Snowpark pandas deviates from pandas here with respect to NA values: when the value is considered NA or
+    values contains at least one NA, None is returned instead of a boolean value.
+
+    Parameters
+    ----------
+    values : set or list-like
+        The sequence of values to test. Passing in a single string will
+        raise a ``TypeError``. Instead, turn a single string into a
+        list of one element.
+
+    Returns
+    -------
+    Series
+        Series of booleans indicating if each element is in values.
+
+    Examples
+    --------
+    >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
+    ...                'hippo'], name='animal')
+    >>> s.isin(['cow', 'lama'])
+    0     True
+    1     True
+    2     True
+    3    False
+    4     True
+    5    False
+    Name: animal, dtype: bool
+
+    To invert the boolean values, use the ``~`` operator:
+
+    >>> ~s.isin(['cow', 'lama'])
+    0    False
+    1    False
+    2    False
+    3     True
+    4    False
+    5     True
+    Name: animal, dtype: bool
+
+    Passing a single string as ``s.isin('lama')`` will raise an error. Use
+    a list of one element instead:
+
+    >>> s.isin(['lama'])
+    0     True
+    1    False
+    2     True
+    3    False
+    4     True
+    5    False
+    Name: animal, dtype: bool
+
+    >>> pd.Series([1]).isin(['1'])
+    0    False
+    dtype: bool
+
+    >>> pd.Series([1, 2, None]).isin([2])
+    0    False
+    1     True
+    2     None
+    dtype: object
+    """
+
+    # pandas compatible TypeError
+    if isinstance(values, str):
+        raise TypeError(
+            "only list-like objects are allowed to be passed to isin(), you passed a [str]"
+        )
+
+    # convert to list if given as set
+    if isinstance(values, set):
+        values = list(values)
+
+    return super(Series, self).isin(values)
diff --git a/src/snowflake/snowpark/modin/plugin/io/__init__.py b/src/snowflake/snowpark/modin/plugin/io/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/io/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/plugin/io/snow_io.py b/src/snowflake/snowpark/modin/plugin/io/snow_io.py
new file mode 100644
index 00000000000..2d7ea8417d5
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/io/snow_io.py
@@ -0,0 +1,638 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# this module houses classes for IO and interacting with Snowflake engine
+
+import inspect
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Hashable,
+    Iterable,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+)
+
+import pandas
+from pandas._libs.lib import NoDefault, no_default
+from pandas._typing import (
+    CSVEngine,
+    DtypeArg,
+    DtypeBackend,
+    FilePath,
+    IndexLabel,
+    StorageOptions,
+)
+from pandas.core.dtypes.common import is_list_like
+
+from snowflake.snowpark.modin.core.execution.dispatching.factories.baseio import BaseIO
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.utils import (
+    error_not_implemented_parameter,
+    should_parse_header,
+    translate_pandas_default,
+    warn_not_supported_parameter,
+)
+
+if TYPE_CHECKING:  # pragma: no cover
+    import csv
+
+
+def _validate_read_csv_and_read_table_args(fn_name, **kwargs):
+    """
+    Helper function to error or warn on arguments that are unsupported by read_csv/read_table.
+    """
+    error_not_none_kwargs = [
+        "verbose",
+        "dayfirst",
+        "date_parser",
+        "date_format",
+        "keep_date_col",
+        "parse_dates",
+        "iterator",
+        "na_filter",
+        "skipfooter",
+        "nrows",
+        "thousands",
+        "decimal",
+        "lineterminator",
+        "dialect",
+        "quoting",
+        "doublequote",
+        "encoding_errors",
+        "comment",
+        "converters",
+        "true_values",
+        "false_values",
+        "keep_default_na",
+        "delim_whitespace",
+        "skipinitialspace",
+        "on_bad_lines",
+    ]
+    for kw in error_not_none_kwargs:
+        error_not_implemented_parameter(kw, kwargs.get(kw) is not None)
+    warn_not_none_kwargs = [
+        "engine",
+        "cache_dates",
+        "infer_datetime_format",
+        "chunksize",
+        "memory_map",
+        "storage_options",
+        "low_memory",
+        "float_precision",
+    ]
+    for kw in warn_not_none_kwargs:
+        warn_not_supported_parameter(kw, kwargs.get(kw) is not None, fn_name)
+    warn_not_supported_parameter(
+        "dtype_backend",
+        kwargs.get("dtype_backend", no_default) is not no_default,
+        fn_name,
+    )
+
+
+class PandasOnSnowflakeIO(BaseIO):
+    """
+    Factory providing methods for peforming I/O methods using pandas on Snowflake.
+
+    Some methods are defined entirely in plugin/pd_overrides.py instead of here
+    because Snowflake provides some different default argument values than pandas.
+    """
+
+    # frame_cls is the internal dataframe class used by Modin for other engines, but Snowflake
+    # does not implement this interface
+    frame_cls = None
+    query_compiler_cls = SnowflakeQueryCompiler
+
+    @classmethod
+    def from_pandas(cls, df: pandas.DataFrame):
+        """invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory
+        Args:
+            df: An existing (native) pandas DataFrame
+        """
+        return cls.query_compiler_cls.from_pandas(df, pandas.DataFrame)
+
+    @classmethod
+    def read_snowflake(
+        cls,
+        name_or_query: Union[str, Iterable[str]],
+        index_col: Optional[Union[str, list[str]]] = None,
+        columns: Optional[list[str]] = None,
+    ):
+        """
+        See detailed docstring and examples in ``read_snowflake`` in frontend layer:
+        src/snowflake/snowpark/modin/pandas/io.py
+        """
+        return cls.query_compiler_cls.from_snowflake(name_or_query, index_col, columns)
+
+    @classmethod
+    def to_snowflake(
+        cls,
+        name: Union[str, Iterable[str]],
+        index: bool = True,
+        overwrite: bool = False,
+        **kwargs,
+    ):
+        """
+        Stores DataFrame into table. Index must be range-index, else storage will be refused.
+        Args:
+            name: table name where to store table in Snowflake
+            index: whether to store index in one (or more columns if Multiindex) column
+            overwrite: whether to replace existing table, else fails with exception
+            **kwargs: other optional arguments to be passed, ignored for now.
+        """
+        return cls.query_compiler_cls.to_snowflake(name, index, overwrite)
+
+    @classmethod
+    def to_snowpark(cls, index: bool = True, index_label: Optional[IndexLabel] = None):
+        """
+         Convert the Snowpark pandas Object(DataFrame or Series) to a Snowpark DataFrame.
+         Note that once converted to a Snowpark Dataframe, no ordering information will be preserved. You can call
+         reset_index to generate a default index column same as row position before call to_snowpark.
+
+         Args:
+             index: bool, default True.
+                 Whether to keep the index columns in the result Snowpark DataFrame. If True, the index columns
+                 will be the first set of columns. Otherwise, no index column will be included in the final Snowpark
+                 DataFrame.
+             index_label: IndexLabel, default None.
+                 Column label(s) to use for the index column(s). If None is given (default) and index is True,
+                 then the original index column labels are used. A sequence should be given if the Snowpark pandas
+                 DataFrame or Series uses MultiIndex, and the length of the given sequence should be the same as
+                 the number of index columns.
+
+         Returns:
+             :class:`~snowflake.snowpark.dataframe.DataFrame`
+                 A Snowpark DataFrame contains the index columns if index=True and all data columns of the Snowpark pandas
+                 DataFrame or Series.
+
+        Note:
+             The labels of the Snowpark pandas DataFrame/Series or index_label provided will be used as Normalized Snowflake
+             Identifiers of the Snowpark DataFrame.
+             For details about Normalized Snowflake Identifiers, please refer to the Note in :func:`~snowflake.snowpark.modin.pandas.io.read_snowflake`
+        """
+        return cls.query_compiler_cls.to_snowpark(
+            index, index_label
+        )  # pragma: no cover
+
+    @classmethod
+    def read_csv(
+        cls,
+        filepath_or_buffer: FilePath,
+        *,
+        sep: Optional[Union[str, NoDefault]] = no_default,
+        delimiter: Optional[str] = None,
+        header: Optional[Union[int, Sequence[int], Literal["infer"]]] = "infer",
+        names: Optional[Union[Sequence[Hashable], NoDefault]] = no_default,
+        index_col: Optional[Union[IndexLabel, Literal[False]]] = None,
+        usecols: Optional[Union[list[Hashable], Callable]] = None,
+        dtype: Optional[DtypeArg] = None,
+        engine: Optional[CSVEngine] = None,
+        converters: Optional[dict[Hashable, Callable]] = None,
+        true_values: Optional[list[Any]] = None,
+        false_values: Optional[list[Any]] = None,
+        skipinitialspace: Optional[bool] = None,
+        skiprows: Optional[int] = 0,
+        skipfooter: Optional[int] = None,
+        nrows: Optional[int] = None,
+        na_values: Optional[Sequence[Hashable]] = None,
+        keep_default_na: Optional[bool] = None,
+        na_filter: Optional[bool] = None,
+        verbose: Optional[bool] = None,
+        skip_blank_lines: Optional[bool] = None,
+        parse_dates: Optional[
+            Union[
+                bool, Sequence[int], Sequence[Sequence[int]], dict[str, Sequence[int]]
+            ]
+        ] = None,
+        infer_datetime_format: Optional[bool] = None,
+        keep_date_col: Optional[bool] = None,
+        date_parser: Optional[Callable] = None,
+        date_format: Optional[Union[str, dict]] = None,
+        dayfirst: Optional[bool] = None,
+        cache_dates: Optional[bool] = None,
+        iterator: bool = None,
+        chunksize: Optional[int] = None,
+        compression: Literal[
+            "infer", "gzip", "bz2", "brotli", "zstd", "deflate", "raw_deflate", "none"
+        ] = "infer",
+        thousands: Optional[str] = None,
+        decimal: Optional[str] = None,
+        lineterminator: Optional[str] = None,
+        quotechar: str = '"',
+        quoting: Optional[int] = None,
+        doublequote: bool = None,
+        escapechar: Optional[str] = None,
+        comment: Optional[str] = None,
+        encoding: Optional[str] = None,
+        encoding_errors: Optional[str] = None,
+        dialect: Optional[Union[str, "csv.Dialect"]] = None,
+        on_bad_lines: str = None,
+        delim_whitespace: Optional[bool] = None,
+        low_memory: Optional[bool] = None,
+        memory_map: Optional[bool] = None,
+        float_precision: Optional[Literal["high", "legacy"]] = None,
+        storage_options: StorageOptions = None,
+        dtype_backend: DtypeBackend = no_default,
+        **kwargs,
+    ) -> SnowflakeQueryCompiler:
+        """
+        Validate arguments and perform I/O operation for read_csv and read_table.
+        """
+        # Copied from modin/pandas/io.py to allow read_csv and read_table to concisely share validation code
+        _pd_read_csv_signature = {
+            val.name for val in inspect.signature(pandas.read_csv).parameters.values()
+        }
+        _, _, _, f_locals = inspect.getargvalues(inspect.currentframe())
+        kwargs = {k: v for k, v in f_locals.items() if k in _pd_read_csv_signature}
+        _validate_read_csv_and_read_table_args("pd.read_csv", **kwargs)
+
+        if not isinstance(filepath_or_buffer, str):
+            raise NotImplementedError(
+                "filepath_or_buffer must be a path to a file or folder stored locally or on a Snowflake stage."
+            )
+
+        sep = translate_pandas_default(sep)
+        names = translate_pandas_default(names)
+
+        if sep is not None and delimiter is not None:
+            raise ValueError(
+                "Specified a sep and a delimiter; you can only specify one."
+            )
+
+        if sep is None:
+            sep = delimiter if delimiter is not None else ","
+
+        # Distributed implementation is not supported for non-first row header or multi-index headers.
+        if (
+            isinstance(header, list)
+            or (isinstance(header, int) and header != 0)
+            or (skiprows != 0 and header is not None)
+        ):
+            error_not_implemented_parameter("header", header)
+
+        parse_header = should_parse_header(header, names)
+
+        if names is not None:
+            if not is_list_like(names, allow_sets=False):
+                raise ValueError("Names should be an ordered collection.")
+
+            if len(set(names)) != len(names):
+                raise ValueError("Duplicate names are not allowed.")
+
+        if compression == "infer":
+            compression = "auto"
+
+        if usecols is not None:
+            if not is_list_like(usecols) and not isinstance(usecols, Callable):
+                raise ValueError(
+                    "'usecols' must either be list-like of all strings, all integers or a callable."
+                )
+            elif is_list_like(usecols):
+                if len(usecols) == 0:
+                    return cls.query_compiler_cls.from_pandas(pandas.DataFrame())
+
+                usecols_is_all_int = all(
+                    [isinstance(column, int) for column in usecols]
+                )
+                usecols_is_all_str = all(
+                    [isinstance(column, str) for column in usecols]
+                )
+
+                if not usecols_is_all_int and not usecols_is_all_str:
+                    raise ValueError(
+                        "'usecols' must either be list-like of all strings, all integers or a callable."
+                    )
+                usecols = list(usecols)
+
+            # Case where usecols is Callable is handled in SnowflakeQueryCompiler.from_file.
+
+        if index_col:
+            if isinstance(index_col, (int, str)):
+                index_col = [index_col]
+            elif isinstance(index_col, (tuple, list)):
+                for column in index_col:
+                    if not isinstance(column, (int, str)):
+                        raise TypeError(
+                            f"list indices must be integers or slices, not {type(column).__name__}"
+                        )
+            else:
+                raise TypeError(
+                    f"list indices must be integers or slices, not {type(index_col).__name__}"
+                )
+
+        return cls.query_compiler_cls.from_file(
+            "csv",
+            filepath_or_buffer,
+            field_delimiter=sep,
+            skip_blank_lines=skip_blank_lines,
+            null_if=na_values,
+            compression=compression,
+            escape=escapechar,
+            skip_header=skiprows,
+            encoding=encoding,
+            field_optionally_enclosed_by=quotechar,
+            parse_header=parse_header,
+            names=names,
+            index_col=index_col,
+            usecols=usecols,
+            dtype=dtype,
+        )
+
+    @classmethod
+    def read_json(
+        cls,
+        path_or_buf: FilePath,
+        *,
+        orient: Optional[str] = None,
+        typ: Optional[Literal["frame", "series"]] = None,
+        dtype: Optional[DtypeArg] = None,
+        convert_axes: Optional[bool] = None,
+        convert_dates: Optional[Union[bool, list[str]]] = None,
+        keep_default_dates: Optional[bool] = None,
+        precise_float: Optional[bool] = None,
+        date_unit: Optional[str] = None,
+        encoding: Optional[str] = None,
+        encoding_errors: Optional[str] = None,
+        lines: Optional[bool] = None,
+        chunksize: Optional[int] = None,
+        compression: Literal[
+            "infer", "gzip", "bz2", "brotli", "zstd", "deflate", "raw_deflate", "none"
+        ] = "infer",
+        nrows: Optional[int] = None,
+        storage_options: StorageOptions = None,
+        dtype_backend: DtypeBackend = no_default,
+        engine: Optional[Literal["ujson", "pyarrow"]] = None,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Validate arguments and perform I/O operation for read_json.
+        """
+        if not isinstance(path_or_buf, str):
+            raise NotImplementedError(
+                "'path' must be a path to a file or folder stored locally or on a Snowflake stage."
+            )
+
+        error_not_implemented_parameter("orient", orient is not None)
+        error_not_implemented_parameter("typ", typ is not None)
+        error_not_implemented_parameter("dtype", dtype is not None)
+        error_not_implemented_parameter("convert_axes", convert_axes is not None)
+        error_not_implemented_parameter("convert_dates", convert_dates is not None)
+        error_not_implemented_parameter(
+            "keep_default_dates", keep_default_dates is not None
+        )
+        error_not_implemented_parameter("precise_float", precise_float is not None)
+        error_not_implemented_parameter("date_unit", date_unit is not None)
+        error_not_implemented_parameter("encoding_errors", encoding_errors is not None)
+        error_not_implemented_parameter("lines", lines is not None)
+        error_not_implemented_parameter("chunksize", chunksize is not None)
+        error_not_implemented_parameter("nrows", nrows is not None)
+
+        fn_name = "pd.read_json"
+        warn_not_supported_parameter(
+            "storage_options", storage_options is not None, fn_name
+        )
+        warn_not_supported_parameter("engine", engine is not None, fn_name)
+        warn_not_supported_parameter(
+            "dtype_backend", dtype_backend is not no_default, fn_name
+        )
+
+        if compression == "infer":
+            compression = "auto"
+
+        return cls.query_compiler_cls.from_file(
+            "json",
+            path_or_buf,
+            compression=compression,
+            encoding=encoding,
+        )
+
+    @classmethod
+    def read_parquet(
+        cls,
+        path: FilePath,
+        *,
+        engine: Optional[str] = None,
+        columns: Optional[list[str]] = None,
+        storage_options: StorageOptions = None,
+        use_nullable_dtypes: Union[bool, NoDefault] = no_default,
+        dtype_backend: Union[DtypeBackend, NoDefault] = no_default,
+        filesystem: str = None,
+        filters: Optional[Union[list[tuple], list[list[tuple]]]] = None,
+        **kwargs: Any,
+    ) -> "SnowflakeQueryCompiler":
+        """
+        Validate arguments and perform I/O operation for read_parquet.
+        """
+        if not isinstance(path, str):
+            raise NotImplementedError(
+                "'path' must be a path to a file or folder stored locally or on a Snowflake stage."
+            )
+
+        error_not_implemented_parameter(
+            "use_nullable_dtypes", use_nullable_dtypes is not no_default
+        )
+
+        fn_name = "pd.read_parquet"
+        warn_not_supported_parameter("engine", engine is not None, fn_name)
+        warn_not_supported_parameter(
+            "storage_options", storage_options is not None, fn_name
+        )
+        warn_not_supported_parameter(
+            "dtype_backend", dtype_backend is not no_default, fn_name
+        )
+        warn_not_supported_parameter("filesystem", filesystem is not None, fn_name)
+        warn_not_supported_parameter("filters", filters is not None, fn_name)
+
+        warn_not_supported_parameter("parquet_kwargs", len(kwargs) > 0, fn_name)
+
+        if columns is not None:
+            if not isinstance(columns, list):
+                raise ValueError("'columns' must either be list of all strings.")
+
+            columns_is_all_str = all([isinstance(column, str) for column in columns])
+
+            if not columns_is_all_str:
+                raise ValueError("'columns' must either be list of all strings.")
+        return cls.query_compiler_cls.from_file("parquet", path, usecols=columns)
+
+    @classmethod
+    def read_gbq(
+        cls,
+        query: str,
+        project_id=None,
+        index_col=None,
+        col_order=None,
+        reauth=False,
+        auth_local_webserver=False,
+        dialect=None,
+        location=None,
+        configuration=None,
+        credentials=None,
+        use_bqstorage_api=None,
+        private_key=None,
+        verbose=None,
+        progress_bar_type=None,
+        max_results=None,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_html(
+        cls,
+        io,
+        *,
+        match=".+",
+        flavor=None,
+        header=None,
+        index_col=None,
+        skiprows=None,
+        attrs=None,
+        parse_dates=False,
+        thousands=",",
+        encoding=None,
+        decimal=".",
+        converters=None,
+        na_values=None,
+        keep_default_na=True,
+        displayed_only=True,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_clipboard(cls, sep=r"\s+", **kwargs):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_excel(cls, **kwargs):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_hdf(
+        cls,
+        path_or_buf,
+        key=None,
+        mode: str = "r",
+        errors: str = "strict",
+        where=None,
+        start=None,
+        stop=None,
+        columns=None,
+        iterator=False,
+        chunksize=None,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_feather(
+        cls,
+        path,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_stata(
+        cls,
+        filepath_or_buffer,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_sas(
+        cls,
+        filepath_or_buffer,
+        *,
+        format=None,
+        index=None,
+        encoding=None,
+        chunksize=None,
+        iterator=False,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_pickle(
+        cls,
+        filepath_or_buffer,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_sql(
+        cls,
+        sql,
+        con,
+        index_col=None,
+        coerce_float=True,
+        params=None,
+        parse_dates=None,
+        columns=None,
+        chunksize=None,
+        dtype_backend=no_default,
+        dtype=None,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_fwf(
+        cls,
+        filepath_or_buffer,
+        *,
+        colspecs="infer",
+        widths=None,
+        infer_nrows=100,
+        dtype_backend=no_default,
+        iterator=False,
+        chunksize=None,
+        **kwds,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_sql_table(
+        cls,
+        table_name,
+        con,
+        schema=None,
+        index_col=None,
+        coerce_float=True,
+        parse_dates=None,
+        columns=None,
+        chunksize=None,
+        dtype_backend=no_default,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_sql_query(
+        cls,
+        sql,
+        con,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def read_spss(cls, path, usecols, convert_categoricals, dtype_backend):
+        ErrorMessage.not_implemented()
+
+    @classmethod
+    def to_pickle(
+        cls,
+        obj,
+        filepath_or_buffer,
+        **kwargs,
+    ):
+        ErrorMessage.not_implemented()
diff --git a/src/snowflake/snowpark/modin/plugin/utils/__init__.py b/src/snowflake/snowpark/modin/plugin/utils/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/utils/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/src/snowflake/snowpark/modin/plugin/utils/error_message.py b/src/snowflake/snowpark/modin/plugin/utils/error_message.py
new file mode 100644
index 00000000000..071bbc4a2fc
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/utils/error_message.py
@@ -0,0 +1,76 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+from logging import getLogger
+from typing import NoReturn
+
+logger = getLogger(__name__)
+
+
+class ErrorMessage:
+    # Only print full ``default to pandas`` warning one time.
+    printed_default_to_pandas = False
+    printed_warnings: set[int] = set()  # Set of hashes of printed warnings
+
+    @classmethod
+    def not_implemented(cls, message: str = "") -> NoReturn:  # pragma: no cover
+        if message == "":
+            message = "This functionality is not yet available in Snowpark pandas API"
+        logger.debug(f"NotImplementedError: {message}")
+        raise NotImplementedError(message)
+
+    @staticmethod
+    def method_not_implemented_error(
+        name: str, class_: str
+    ) -> None:  # pragma: no cover
+        """
+        Invokes ``ErrorMessage.not_implemented()`` with specified method name and class.
+
+        Parameters
+        ----------
+        name: str
+            The method that is not implemented.
+        class_: str
+            The class of Snowpark pandas function associated with the method.
+        """
+        message = f"{name} is not implemented for {class_}"
+        ErrorMessage.not_implemented(message)
+
+    # TODO SNOW-840704: using Snowpark pandas exception class for the internal error
+    @classmethod
+    def internal_error(
+        cls, failure_condition: bool, extra_log: str = ""
+    ) -> None:  # pragma: no cover
+        if failure_condition:
+            raise Exception(f"Internal Error: {extra_log}")
+
+    @classmethod
+    def catch_bugs_and_request_email(
+        cls, failure_condition: bool, extra_log: str = ""
+    ) -> None:  # pragma: no cover
+        if failure_condition:
+            logger.info(f"Modin Error: Internal Error: {extra_log}")
+            raise Exception(
+                "Internal Error. "
+                + "Please visit https://github.com/modin-project/modin/issues "
+                + "to file an issue with the traceback and the command that "
+                + "caused this error. If you can't file a GitHub issue, "
+                + f"please email bug_reports@modin.org.\n{extra_log}"
+            )
diff --git a/src/snowflake/snowpark/modin/plugin/utils/exceptions.py b/src/snowflake/snowpark/modin/plugin/utils/exceptions.py
new file mode 100644
index 00000000000..86299bb4e88
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/utils/exceptions.py
@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+"""This package contains all Snowpark pandas exceptions."""
+from enum import Enum
+
+from snowflake.snowpark.exceptions import SnowparkClientException
+
+
+class SnowparkPandasErrorCode(Enum):
+    # Snowpark pandas specific error starts with error code 3XXX
+    GENERAL_SQL_EXCEPTION = 3000
+
+
+# TODO (SNOW-840704): Wrap snowpark SQL exception with more details
+#   Snowpark pandas specific error starts with error code 3XXX
+class SnowparkPandasException(SnowparkClientException):
+    """
+    Base Snowpark pandas exception class.
+    """
+
+    pass
diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
new file mode 100644
index 00000000000..6df7683e113
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
@@ -0,0 +1,151 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Any, Optional, Union
+
+import snowflake.snowpark.modin.pandas as pd
+from snowflake.snowpark.modin.pandas.base import BasePandasDataset
+from snowflake.snowpark.modin.pandas.utils import is_scalar
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+
+
+def where_mapper(
+    cond: Union[bool, pd.DataFrame, pd.Series],
+    x: Optional[Union[pd.DataFrame, pd.Series]] = None,
+    y: Optional[Union[pd.DataFrame, pd.Series]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
+    """
+    Maps and executes the numpy where signature to the pandas where signature
+    if it can be handled, otherwise returns NotImplemented. This implementation
+    has several implementation differences:
+    * Column
+
+    Numpy Signature:
+    Return elements chosen from x or y depending on condition.
+
+    Parameters
+    ----------
+    cond : condition, array_like, bool, or Modin Query Compiler
+        Where True, yield x, otherwise yield y.
+    x,y : array_like, or Modin Query Compiler
+
+    Returns
+    -------
+    Returns a DataFrame, Series or NotImplemented if can't support the
+    operation and we want numpy to continue looking for implementations
+
+    """
+
+    # We check the shape of the input objects first, even before checking
+    # to see if we have a boolean cond because this addresses broadcasting
+    # differences with numpy and vanilla pandas. Unfortunately we cannot
+    # check the index length efficiently.
+    def is_same_shape(
+        cond: Union[bool, pd.DataFrame, pd.Series],
+        x: Optional[Union[pd.DataFrame, pd.Series]],
+        y: Optional[Union[pd.DataFrame, pd.Series]],
+    ) -> bool:
+        inputs = [cond, x, y]
+        shape_data = []
+        for obj in inputs:
+            if hasattr(obj, "_query_compiler"):
+                curr_df = hasattr(obj, "columns")
+                curr_num_cols = len(obj.columns) if curr_df else None  # type: ignore
+                cols = tuple(obj.columns) if curr_df else None  # type: ignore
+                shape_data.append((curr_df, curr_num_cols, cols))
+        return len(set(shape_data)) == 1
+
+    if not is_same_shape(cond, x, y):
+        WarningMessage.mismatch_with_pandas(
+            "np.where",
+            "Using np.where with Snowpark pandas requires objects of the same shape and columns",
+        )
+        # Return sentinel value to tell np.where to keep looking for an
+        # implementation
+        return NotImplemented
+
+    # conditional boolean cases
+    if cond is True:
+        return x
+    if cond is False:
+        return y
+
+    # Detect whether the input condition
+    if hasattr(cond, "_query_compiler"):
+        WarningMessage.mismatch_with_pandas(
+            "np.where",
+            "Returns a Snowpark pandas object instead of a np array",
+        )
+        # handles np.where(df1, df2, df3)
+        if hasattr(x, "_query_compiler") and hasattr(y, "_query_compiler"):
+            return x.__constructor__(  # type: ignore
+                query_compiler=x._query_compiler.where(  # type: ignore
+                    cond._query_compiler, y._query_compiler  # type: ignore
+                )
+            )
+
+        # handles np.where(df1, df2, scalar)
+        if hasattr(x, "_query_compiler") and is_scalar(y):
+            # no need to broadcast y
+            return x.__constructor__(  # type: ignore
+                query_compiler=x._query_compiler.where(cond._query_compiler, y)  # type: ignore
+            )
+
+        if is_scalar(x):
+            # broadcast scalar x to size of cond
+            object_shape = cond.shape
+            if len(object_shape) == 1:
+                df_scalar = pd.Series(x, index=range(object_shape[0]))
+            elif len(object_shape) == 2:
+                df_scalar = pd.DataFrame(
+                    x, index=range(object_shape[0]), columns=range(object_shape[1])
+                )
+
+            # handles np.where(df, scalar1, scalar2)
+            if is_scalar(y):
+                return cond.__constructor__(  # type: ignore
+                    query_compiler=df_scalar._query_compiler.where(  # type: ignore
+                        cond._query_compiler, y  # type: ignore
+                    )
+                )
+            # handles np.where(df1, scalar, df2)
+            if hasattr(y, "_query_compiler"):
+                return cond.__constructor__(  # type: ignore
+                    query_compiler=df_scalar._query_compiler.where(  # type: ignore
+                        cond._query_compiler, y._query_compiler  # type: ignore
+                    )
+                )
+    # return the sentinel NotImplemented if we do not support this function
+    return NotImplemented
+
+
+# We also need to convert everything to booleans, since numpy will
+# do this implicitly on logical operators and pandas does not.
+def map_to_bools(inputs: Any) -> Any:
+    return (v.astype("bool") if isinstance(v, BasePandasDataset) else v for v in inputs)
+
+
+# Map that associates a numpy general function (np.where) with
+# an associated pandas function (pd.where) using a mapping function
+# (where_mapper) which can adapt differing function signatures. These
+# functions are called by numpy
+numpy_to_pandas_func_map = {"where": where_mapper}
+
+# Map that associates a numpy universal function name that operates on
+# ndarrays in an element by element fashion with a lambda which performs
+# the same function on the input type. These functions are called from
+# the __array_ufunc__ function
+# https://numpy.org/doc/stable/reference/ufuncs.html
+numpy_to_pandas_universal_func_map = {
+    "add": lambda obj, inputs, kwargs: obj.__add__(*inputs, **kwargs),
+    "logical_and": lambda obj, inputs, kwargs: obj.astype("bool").__and__(
+        *map_to_bools(inputs), **kwargs
+    ),
+    "logical_or": lambda obj, inputs, kwargs: obj.astype("bool").__or__(
+        *map_to_bools(inputs), **kwargs
+    ),
+    "logical_not": lambda obj, inputs, kwargs: ~obj.astype("bool"),
+    "logical_xor": lambda obj, inputs, kwargs: obj.astype("bool").__xor__(
+        *map_to_bools(inputs), **kwargs
+    ),
+}
diff --git a/src/snowflake/snowpark/modin/plugin/utils/warning_message.py b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py
new file mode 100644
index 00000000000..0ac28cb63c8
--- /dev/null
+++ b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py
@@ -0,0 +1,105 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from logging import getLogger
+from typing import Any
+
+logger = getLogger(__name__)
+
+SET_DATAFRAME_ATTRIBUTE_WARNING = (
+    "Snowpark pandas API doesn't allow columns to be created via a new attribute name - see "
+    + "https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access"
+)
+
+
+TUPLES_STORED_AS_ARRAY_DEFAULT_MESSAGE = (
+    "Snowflake backend doesn't support tuples datatype. Tuple row labels are stored as"
+    " ARRAY"
+)
+
+ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING = (
+    "The SQL query passed in to this invocation of `pd.read_snowflake` contains an ORDER BY "
+    "clause. Currently, Snowpark pandas does not guarantee order is preserved when an ORDER BY is "
+    "used with `pd.read_snowflake`. To ensure ordering, please use `pd.read_snowflake(...).sort_values(...)`."
+)
+
+
+# TODO SNOW-828589: throw default to pandas warning here
+class WarningMessage:
+    printed_warnings: set[int] = set()  # Set of hashes of printed warnings
+
+    @classmethod
+    def single_warning(cls, message: str) -> None:
+        """Warning will only be printed out at the first time."""
+        message_hash = hash(message)
+        if message_hash in cls.printed_warnings:
+            logger.debug(f"Single Warning: {message} was raised and suppressed.")
+            return
+
+        logger.debug(f"Single Warning: {message} was raised.")
+        logger.warning(message, stacklevel=2)
+        cls.printed_warnings.add(message_hash)
+
+    @classmethod
+    def ignored_argument(cls, operation: str, argument: str, message: str) -> None:
+        cls.single_warning(
+            f"The argument `{argument}` of `{operation}` has been ignored by Snowpark pandas API:\n{message}."
+        )
+
+    # TODO SNOW-859965: Clean up ErrorMessage.mismatch_with_pandas in groupby.py
+    @classmethod
+    def mismatch_with_pandas(cls, operation: str, message: str) -> None:
+        cls.single_warning(
+            f"`{operation}` implementation has mismatches with pandas:\n{message}."
+        )
+
+    @classmethod
+    def tuples_stored_as_array(
+        cls, message: str = TUPLES_STORED_AS_ARRAY_DEFAULT_MESSAGE
+    ) -> None:
+        cls.single_warning(message)
+
+    @classmethod
+    def warning_if_engine_args_is_set(
+        cls,
+        operation: str,
+        args: Any,
+        kwargs: Any,
+    ) -> None:  # pragma: no cover
+        """
+        Invokes ``ignored_argument`` for operation
+        ``operation`` if arguments ``args`` or ``kwargs`` is set.
+
+        Commonly used when ``engine`` or ``engine_kwargs`` are set for the given operation
+        which Snowflake ignores as the ``engine`` is always SQL.
+
+        Parameters
+        ----------
+        operation : str
+            Name of operation.
+
+        args : Any
+            Arguments passed into operation ``operation``.
+
+        kwargs : Any
+            Keyword arguments passed into operation ``operation``.
+        """
+        engine_parameter_ignored_message = (
+            "Snowpark pandas API executes on Snowflake. "
+            "Ignoring engine related arguments to select a different execution engine."
+        )
+
+        if args:
+            WarningMessage.ignored_argument(
+                operation,
+                "engine",
+                engine_parameter_ignored_message,
+            )
+
+        if kwargs:
+            WarningMessage.ignored_argument(
+                operation,
+                "engine_kwargs",
+                engine_parameter_ignored_message,
+            )
diff --git a/src/snowflake/snowpark/modin/utils.py b/src/snowflake/snowpark/modin/utils.py
new file mode 100644
index 00000000000..d97f9ec23be
--- /dev/null
+++ b/src/snowflake/snowpark/modin/utils.py
@@ -0,0 +1,1212 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+# Code in this file may constitute partial or total reimplementation, or modification of
+# existing code originally distributed by the Modin project, under the Apache License,
+# Version 2.0.
+
+"""Collection of general utility functions, mostly for internal use."""
+
+
+import functools
+import importlib
+import inspect
+import os
+import re
+import types
+from collections.abc import Hashable, Sequence
+from pathlib import Path
+from textwrap import dedent, indent
+from typing import (
+    Any,
+    Callable,
+    Literal,
+    NoReturn,
+    Optional,
+    Protocol,
+    TypeVar,
+    Union,
+    runtime_checkable,
+)
+
+import numpy as np
+import pandas
+from pandas._libs.lib import NoDefault, no_default
+from pandas.core.dtypes.inference import is_float, is_integer
+from pandas.util._decorators import Appender
+
+from snowflake.snowpark.modin.config import DocModule
+from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+
+T = TypeVar("T")
+"""Generic type parameter"""
+
+Fn = TypeVar("Fn", bound=Callable)
+"""Function type parameter (used in decorators that don't change a function's signature)"""
+
+
+@runtime_checkable
+class SupportsPrivateToPandas(Protocol):  # noqa: PR01
+    """Structural type for objects with a ``_to_pandas`` method (note the leading underscore)."""
+
+    def _to_pandas(self) -> Any:  # noqa: GL08
+        # TODO add proper return type
+        pass
+
+
+@runtime_checkable
+class SupportsPublicToPandas(Protocol):  # noqa: PR01
+    """Structural type for objects with a ``to_pandas`` method (without a leading underscore)."""
+
+    def to_pandas(self) -> Any:  # noqa: GL08
+        pass
+
+
+@runtime_checkable
+class SupportsPublicToNumPy(Protocol):  # noqa: PR01
+    """Structural type for objects with a ``to_numpy`` method (without a leading underscore)."""
+
+    def to_numpy(self) -> Any:  # pragma: no cover  # noqa: GL08
+        pass
+
+
+@runtime_checkable
+class SupportsPrivateToNumPy(Protocol):  # noqa: PR01
+    """Structural type for objects with a ``_to_numpy`` method (note the leading underscore)."""
+
+    def _to_numpy(self) -> Any:  # pragma: no cover  # noqa: GL08
+        pass
+
+
+PANDAS_API_URL_TEMPLATE = f"https://pandas.pydata.org/pandas-docs/version/{pandas.__version__}/reference/api/{{}}.html"
+
+MODIN_UNNAMED_SERIES_LABEL = "__reduced__"
+"""
+The '__reduced__' name is used internally by the query compiler as a column name to
+represent pandas Series objects that are not explicitly assigned a name, so as to
+distinguish between an N-element series and 1xN dataframe.
+"""
+
+
+def _make_api_url(token: str) -> str:
+    """
+    Generate the link to pandas documentation.
+
+    Parameters
+    ----------
+    token : str
+        Part of URL to use for generation.
+
+    Returns
+    -------
+    str
+        URL to pandas doc.
+
+    Notes
+    -----
+    This function is extracted for better testability.
+    """
+    return PANDAS_API_URL_TEMPLATE.format(token)
+
+
+def _get_indent(doc: str) -> int:
+    """
+    Compute indentation in docstring.
+
+    Parameters
+    ----------
+    doc : str
+        The docstring to compute indentation for.
+
+    Returns
+    -------
+    int
+        Minimal indent (excluding empty lines).
+    """
+    indents = _get_indents(doc)
+    return min(indents) if indents else 0
+
+
+def _get_indents(source: Union[list, str]) -> list:
+    """
+    Compute indentation for each line of the source string.
+
+    Parameters
+    ----------
+    source : str or list of str
+        String to compute indents for. Passed list considered
+        as a list of lines of the source string.
+
+    Returns
+    -------
+    list of ints
+        List containing computed indents for each line.
+    """
+    indents = []
+
+    if not isinstance(source, list):
+        source = source.splitlines()
+
+    for line in source:
+        if not line.strip():
+            continue
+        for pos, ch in enumerate(line):  # noqa: B007
+            if ch != " ":
+                break
+        indents.append(pos)
+    return indents
+
+
+def format_string(template: str, **kwargs: str) -> str:
+    """
+    Insert passed values at the corresponding placeholders of the specified template.
+
+    In contrast with the regular ``str.format()`` this function computes proper
+    indents for the placeholder values.
+
+    Parameters
+    ----------
+    template : str
+        Template to substitute values in.
+    **kwargs : dict
+        Dictionary that maps placeholder names with values.
+
+    Returns
+    -------
+    str
+        Formated string.
+    """
+    # We want to change indentation only for those values which placeholders are located
+    # at the start of the line, in that case the placeholder sets an indentation
+    # that the filling value has to obey.
+    # RegExp determining placeholders located at the beginning of the line.
+    regex = r"^( *)\{(\w+)\}"
+    for line in template.splitlines():
+        if line.strip() == "":
+            continue
+        match = re.search(regex, line)
+        if match is None:
+            continue
+        nspaces = len(match.group(1))
+        key = match.group(2)
+
+        value = kwargs.get(key)
+        if not value:
+            continue
+        value = dedent(value)
+
+        # Since placeholder is located at the beginning of a new line,
+        # it already has '\n' before it, so to avoid double new lines
+        # we want to discard the first leading '\n' at the value line,
+        # the others leading '\n' are considered as being put on purpose
+        if value[0] == "\n":
+            value = value[1:]
+        # `.splitlines()` doesn't preserve last empty line,
+        # so we have to restore it further
+        value_lines = value.splitlines()
+        # We're not indenting the first line of the value, since it's already indented
+        # properly because of the placeholder indentation.
+        indented_lines = [
+            indent(line, " " * nspaces) if line != "\n" else line
+            for line in value_lines[1:]
+        ]
+        # If necessary, restoring the last line dropped by `.splitlines()`
+        if value[-1] == "\n":
+            indented_lines += [" " * nspaces]
+
+        indented_value = "\n".join([value_lines[0], *indented_lines])
+        kwargs[key] = indented_value
+
+    return template.format(**kwargs)
+
+
+def align_indents(source: str, target: str) -> str:
+    """
+    Align indents of two strings.
+
+    Parameters
+    ----------
+    source : str
+        Source string to align indents with.
+    target : str
+        Target string to align indents.
+
+    Returns
+    -------
+    str
+        Target string with indents aligned with the source.
+    """
+    source_indent = _get_indent(source)
+    target = dedent(target)
+    return indent(target, " " * source_indent)
+
+
+def append_to_docstring(message: str) -> Callable[[Fn], Fn]:
+    """
+    Create a decorator which appends passed message to the function's docstring.
+
+    Parameters
+    ----------
+    message : str
+        Message to append.
+
+    Returns
+    -------
+    callable
+    """
+
+    def decorator(func: Fn) -> Fn:
+        to_append = align_indents(func.__doc__ or "", message)
+        return Appender(to_append)(func)
+
+    return decorator
+
+
+def _replace_doc(
+    source_obj: object,
+    target_obj: object,
+    overwrite: bool,
+    apilink: Optional[Union[str, list[str]]],
+    parent_cls: Optional[Fn] = None,
+    attr_name: Optional[str] = None,
+    modify_doc: Optional[Callable[[object, str], str]] = None,
+) -> None:
+    """
+    Replace docstring in `target_obj`, possibly taking from `source_obj` and augmenting.
+
+    Can append the link to pandas API online documentation.
+
+    Parameters
+    ----------
+    source_obj : object
+        Any object from which to take docstring from.
+    target_obj : object
+        The object which docstring to replace.
+    overwrite : bool
+        Forces replacing the docstring with the one from `source_obj` even
+        if `target_obj` has its own non-empty docstring.
+    apilink : str | List[str], optional
+        If non-empty, insert the link(s) to pandas API documentation.
+        Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
+    parent_cls : class, optional
+        If `target_obj` is an attribute of a class, `parent_cls` should be that class.
+        This is used for generating the API URL as well as for handling special cases
+        like `target_obj` being a property.
+    attr_name : str, optional
+        Gives the name to `target_obj` if it's an attribute of `parent_cls`.
+        Needed to handle some special cases and in most cases could be determined automatically.
+    modify_doc: Optional[Callable[[object, str],str]], default: None
+        If not None, then call this function to retrieve a new docstring to use.
+    """
+    if isinstance(target_obj, (staticmethod, classmethod)):
+        # we cannot replace docs on decorated objects, we must replace them
+        # on original functions instead
+        target_obj = target_obj.__func__
+
+    source_doc = source_obj.__doc__ or ""
+    target_doc = target_obj.__doc__ or ""
+    overwrite = overwrite or not target_doc
+    doc = source_doc if overwrite else target_doc
+
+    if parent_cls and not attr_name:
+        if isinstance(target_obj, property):
+            attr_name = target_obj.fget.__name__  # type: ignore[union-attr]
+        elif isinstance(target_obj, (staticmethod, classmethod)):
+            attr_name = target_obj.__func__.__name__
+        else:
+            attr_name = target_obj.__name__  # type: ignore[attr-defined]
+
+    if (
+        source_doc.strip()
+        and apilink
+        and "pandas API documentation for " not in target_doc
+        and (not (attr_name or "").startswith("_"))
+    ):
+        apilink_l = [apilink] if not isinstance(apilink, list) and apilink else apilink
+        links = []
+        for link in apilink_l:
+            if attr_name:
+                token = f"{link}.{attr_name}"
+            else:
+                token = link
+            url = _make_api_url(token)
+            links.append(f"`{token} <{url}>`_")
+
+        indent_line = " " * _get_indent(doc)
+        notes_section = f"\n{indent_line}Note\n{indent_line}-----\n"
+
+        url_line = f"{indent_line}See pandas API documentation for {', '.join(links).strip()} for more.\n"
+        notes_section_with_url = notes_section + url_line
+
+        if notes_section in doc:
+            doc = doc.replace(notes_section, notes_section_with_url)
+        else:
+            doc += notes_section_with_url
+
+    # apply custom modify doc
+    if modify_doc is not None:
+        doc = modify_doc(target_obj, doc)
+
+    if parent_cls and isinstance(target_obj, property):
+        if overwrite:
+            target_obj.fget.__doc_inherited__ = True  # type: ignore[union-attr]
+        assert attr_name is not None
+        setattr(
+            parent_cls,
+            attr_name,
+            property(target_obj.fget, target_obj.fset, target_obj.fdel, doc),
+        )
+    else:
+        if overwrite:
+            target_obj.__doc_inherited__ = True  # type: ignore[attr-defined]
+        target_obj.__doc__ = doc
+
+
+# This is a map from objects whose docstrings we are overriding to functions that
+# take a DocModule string and override the docstring according to the
+# DocModule. When we update DocModule, we can use this map to update all
+# inherited docstrings.
+_docstring_inheritance_calls: list[Callable[[str], None]] = []
+
+# This is a set of (class, attribute_name) pairs whose docstrings we have
+# already replaced since we last updated DocModule. Note that we don't store
+# the attributes themselves since we replace property attributes instead of
+# modifying them in place:
+# https://github.com/modin-project/modin/blob/e9dbcc127913db77473a83936e8b6bb94ef84f0d/modin/utils.py#L353
+_attributes_with_docstrings_replaced: set[tuple[type, str]] = set()
+
+
+def _documentable_obj(obj: object) -> bool:
+    """Check if `obj` docstring could be patched."""
+    return bool(
+        callable(obj)
+        or (isinstance(obj, property) and obj.fget)
+        or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
+    )
+
+
+def _update_inherited_docstrings(doc_module: DocModule) -> None:
+    """
+    Update all inherited docstrings.
+
+    Parameters
+    ----------
+    doc_module: DocModule
+        The current DocModule
+    """
+    _attributes_with_docstrings_replaced.clear()
+    for doc_inheritance_call in _docstring_inheritance_calls:
+        doc_inheritance_call(doc_module=doc_module.get())
+
+
+def _inherit_docstrings_in_place(
+    cls_or_func,
+    doc_module,
+    parent,
+    excluded,
+    overwrite_existing,
+    apilink,
+    modify_doc,
+) -> None:
+    # Import the docs module and get the class (e.g. `DataFrame`).
+    imported_doc_module = importlib.import_module(doc_module)
+    # Set the default parent so we can use it in case some docs are missing from
+    # parent module.
+    default_parent = parent
+    # Try to get the parent object from the doc module, and if it isn't there,
+    # get it from parent instead. We only do this if we are overriding pandas
+    # documentation. We don't touch other docs.
+    if doc_module != DocModule.default and "pandas" in str(
+        getattr(parent, "__module__", "")
+    ):
+        parent_name = (
+            # DocModule should use the class BasePandasDataset to override the
+            # docstrings of BasePandasDataset, even if BasePandasDataset
+            # normally inherits docstrings from a different `parent`.
+            # TODO(https://github.com/modin-project/modin/issues/7134): upstream
+            # this fix to Modin.
+            "BasePandasDataset"
+            if getattr(cls_or_func, "__name__", "") == "BasePandasDataset"
+            # For other classes, override docstrings with the class that has the
+            # same name as the `parent` class, e.g. DataFrame inherits
+            # docstrings from doc_module.DataFrame.
+            else getattr(parent, "__name__", "")
+        )
+        parent = getattr(imported_doc_module, parent_name, parent)
+    if parent != default_parent:
+        # Reset API link in case the docs are overridden.
+        apilink = None
+        overwrite_existing = True
+
+    if parent not in excluded:
+        _replace_doc(
+            parent, cls_or_func, overwrite_existing, apilink, modify_doc=modify_doc
+        )
+
+    if not isinstance(cls_or_func, types.FunctionType):
+        seen = set()
+        for base in cls_or_func.__mro__:  # type: ignore[attr-defined]
+            if base is object:
+                continue
+            for attr, obj in base.__dict__.items():
+                if attr in seen or (base, attr) in _attributes_with_docstrings_replaced:
+                    continue
+                seen.add(attr)
+                # Try to get the attribute from the docs class first, then
+                # from the default parent (pandas), and if it's not in either,
+                # set `parent_obj` to `None`.
+                parent_obj = getattr(parent, attr, getattr(default_parent, attr, None))
+                if (
+                    parent_obj in excluded
+                    or not _documentable_obj(parent_obj)
+                    or not _documentable_obj(obj)
+                ):
+                    continue
+
+                _replace_doc(
+                    parent_obj,
+                    obj,
+                    overwrite_existing,
+                    apilink,
+                    parent_cls=cls_or_func,
+                    attr_name=attr,
+                    modify_doc=modify_doc,
+                )
+
+                _attributes_with_docstrings_replaced.add((base, attr))
+
+
+def _inherit_docstrings(
+    parent: object,
+    excluded: list[object] = [],  # noqa: B006
+    overwrite_existing: bool = False,
+    apilink: Optional[Union[str, list[str]]] = None,
+    modify_doc: Optional[Callable[[object, str], str]] = None,
+) -> Callable[[Fn], Fn]:
+    """
+    Create a decorator which overwrites decorated object docstring(s).
+
+    It takes `parent` __doc__ attribute. Also overwrites __doc__ of
+    methods and properties defined in the target or its ancestors if it's a class
+    with the __doc__ of matching methods and properties from the `parent`.
+
+    Parameters
+    ----------
+    parent : object
+        Parent object from which the decorated object inherits __doc__.
+    excluded : list, default: []
+        List of parent objects from which the class does not
+        inherit docstrings.
+    overwrite_existing : bool, default: False
+        Allow overwriting docstrings that already exist in
+        the decorated class.
+    apilink : str | List[str], optional
+        If non-empty, insert the link(s) to pandas API documentation.
+        Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
+    modify_doc: Optional[Callable[[str],str]], default: None
+        If not None, then call this function to retrieve a new docstring to use.
+
+    Returns
+    -------
+    callable
+        Decorator which replaces the decorated object's documentation with `parent` documentation.
+
+    Notes
+    -----
+    Keep in mind that the function will override docstrings even for attributes which
+    are not defined in target class (but are defined in the ancestor class),
+    which means that ancestor class attribute docstrings could also change.
+    """
+    # NOTE this implementation fixes https://github.com/modin-project/modin/issues/7138,
+    # which we need to fix upstream.
+    def decorator(cls_or_func: Fn) -> Fn:
+        inherit_docstring_in_place = functools.partial(
+            _inherit_docstrings_in_place,
+            cls_or_func=cls_or_func,
+            parent=parent,
+            excluded=excluded,
+            overwrite_existing=overwrite_existing,
+            apilink=apilink,
+            modify_doc=modify_doc,
+        )
+        inherit_docstring_in_place(doc_module=DocModule.get())
+        _docstring_inheritance_calls.append(inherit_docstring_in_place)
+        return cls_or_func
+
+    return decorator
+
+
+DocModule.subscribe(_update_inherited_docstrings)
+
+
+def expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]:
+    """
+    Decorate a function replacing its path argument with "user-expanded" value.
+
+    Parameters
+    ----------
+    argname : str
+        Name of the argument which is containing a path to be expanded.
+
+    Returns
+    -------
+    callable
+        Decorator which performs the replacement.
+    """
+
+    def decorator(func: Fn) -> Fn:
+        signature = inspect.signature(func)
+        assert (
+            getattr(signature.parameters.get(argname), "name", None) == argname
+        ), f"Function {func} does not take '{argname}' as argument"
+
+        @functools.wraps(func)
+        def wrapped(*args: tuple, **kw: dict) -> Any:
+            params = signature.bind(*args, **kw)
+            if patharg := params.arguments.get(argname, None):
+                if isinstance(patharg, str) and patharg.startswith(
+                    "~"
+                ):  # pragma: no cover
+                    params.arguments[argname] = os.path.expanduser(patharg)
+                elif isinstance(patharg, Path):  # pragma: no cover
+                    params.arguments[argname] = patharg.expanduser()
+                return func(*params.args, **params.kwargs)
+            return func(*args, **kw)
+
+        return wrapped  # type: ignore[return-value]
+
+    return decorator
+
+
+def extract_sections(text: str) -> list[dict[str, str]]:
+    """
+    split docstring into sections.
+    Args:
+        text: input text
+
+    Returns:
+        list of dictionaries where each dictionary contains a key heading, and a key content.
+    """
+    lines = text.split("\n")
+    # produces list of lines that contain only - chars
+    # this means that the line before is a heading for a start of a section (if not whitespace)
+    heading_linenos = list(
+        map(
+            lambda t: t[0],
+            filter(
+                lambda t: len(t[1]) > 0 and t[1].count("-") == len(t[1]),
+                zip(range(len(lines)), lines),
+            ),
+        )
+    )
+
+    last_lineno = 0
+    last_heading = ""
+    sections = []
+    for lineno in heading_linenos:
+        # is line before heading
+        heading = ""
+        if lineno - 1 >= 0:
+            heading = lines[lineno - 1]
+
+            # if only whitespace, it's a section starting with ---- (line sep)
+            if len(heading.strip()) == 0:
+                heading = ""
+        # fetch section as string
+        section_lines = lines[last_lineno : lineno - 1]
+
+        # remove ----- if section has one
+        if section_lines[0].startswith("-"):
+            section_lines = section_lines[1:]
+
+        sections.append({"heading": last_heading, "content": "\n".join(section_lines)})
+        last_lineno = lineno
+        last_heading = heading
+
+    # fetch section as string
+    section_lines = lines[last_lineno:]
+
+    # remove ----- if section has one
+    if section_lines[0].startswith("-"):
+        section_lines = section_lines[1:]
+
+    sections.append({"heading": last_heading, "content": "\n".join(section_lines)})
+
+    return sections
+
+
+def assemble_sections_to_string(sections: list[dict[str, str]]) -> str:
+    """
+    create docstring from individual sections given as dictionaries (see extract sections)
+    Args:
+        sections:
+            list of dictionaries where each dictionary contains a key heading, and a key content.
+    Returns:
+        combined string
+    """
+
+    return "\n".join(
+        map(
+            lambda section: section["heading"]
+            + "\n"
+            + "-" * len(section["heading"])
+            + "\n"
+            + section["content"],
+            sections,
+        )
+    )
+
+
+def find_index(seq: Sequence[Any], predicate: Callable) -> Optional[int]:
+    """
+    find index of element that satisfies first condition from left to right
+    Args:
+        seq: iterable to check with predicate
+        predicate: predicate function to determine whether a match occurred or not
+
+    Returns:
+        index of first matching element, if no match None
+    """
+
+    indices = list(
+        map(
+            lambda t: t[0],
+            filter(lambda t: predicate(t[1]), zip(range(len(seq)), seq)),
+        )
+    )
+    if not indices:
+        return None
+    return indices[0]
+
+
+# operators are similar and use the same example, generate docstring depending on current API support
+def _create_operator_docstring(
+    parent: object,
+    excluded: list[object] = [],  # noqa: B006
+    overwrite_existing: bool = False,
+    apilink: Optional[Union[str, list[str]]] = None,
+) -> Callable[[Fn], Fn]:
+    def modify_doc(parent: object, doc: str) -> str:
+
+        sections = extract_sections(doc)
+
+        # add a Caution section to warn that fill_value / level are not supported yet.
+        # before examples
+        index = find_index(sections, lambda s: s["heading"] == "Examples")
+        assert index is not None
+
+        sections.insert(
+            index,
+            {
+                "heading": "Caution",
+                "content": "Snowpark pandas API does not support `fill_value` and `level` except for default values.",
+            },
+        )
+
+        # When invalid numeric values are present, Snowpark denotes them with `None` while native pandas uses `NaN`.
+        sections.insert(
+            index,
+            {
+                "heading": "Caution",
+                "content": "Snowpark pandas API denotes invalid numeric results with `None` while pandas uses `NaN`.",
+            },
+        )
+
+        op_name = parent.__name__  # type: ignore[attr-defined]
+        sections.insert(
+            index,
+            {
+                "heading": "Caution",
+                "content": "In Snowpark pandas API, whenever a binary operation involves a NULL value, "
+                f"the result will preserve NULL values, e.g. NULL.{op_name}(<other>) will yield NULL.",
+            },
+        )
+
+        # for div operators, add another caution section pointing out the different div-by-zero behavior
+        if "floordiv" in op_name or "truediv" in op_name:
+            sections.insert(
+                index,
+                {
+                    "heading": "Caution",
+                    "content": "Snowpark pandas API will always produce a division by zero error if the right hand side contains one or more zeroes. This is different from pandas which only produces a ZeroDivisionError exception when ``dtype='object'``.",
+                },
+            )
+
+        if "pow" in op_name:
+            # Zero raised to negative powers is supported in Snowpark and results in inf values. None ** 0 = 1 and 1 ** None = 1
+            sections.insert(
+                index,
+                {
+                    "heading": "Caution",
+                    "content": "In Snowpark pandas API, zero raised to negative powers produces `inf` value as the result while pandas raises a ValueError. In Snowpark pandas API, `None` raised to `0` and `1` raised to `None` produce `1.0` as the result. pandas produces `np.nan` for both cases.",
+                },
+            )
+
+        # Currently unimplemented, behavior unknown. Raises NotImplementedError.
+        # TODO: SNOW-916731 bug with %d and 0.
+        # TODO: SNOW-916733 support string and non-numeric data as operands.
+        if "mod" in op_name or "rmod" in op_name:
+            sections.insert(
+                index,
+                {
+                    "heading": "Caution",
+                    "content": """
+                    Snowpark pandas API computes `mod` differently from pandas and Python. Snowpark pandas API only supports numeric data with the `mod` operator. Below is a table highlighting the differences in modulo computation for Python, pandas, and Snowpark pandas.
+                    +---------------------------+---------------------------+---------------------------+
+                    | Python                    | pandas                    | Snowpark pandas           |
+                    +---------------------------+---------------------------+---------------------------+
+                    |  7 %  5 =  2              |  7 %  5 =  2              |  7 %  5 =  2              |
+                    +---------------------------+---------------------------+---------------------------+
+                    |  7 % -5 = -3              |  7 % -5 = -3              |  7 % -5 =  2              |
+                    +---------------------------+---------------------------+---------------------------+
+                    | -7 %  5 =  3              | -7 %  5 =  3              | -7 %  5 = -2              |
+                    +---------------------------+---------------------------+---------------------------+
+                    | -7 % -5 = -2              | -7 % -5 = -2              | -7 % -5 = -2              |
+                    +---------------------------+---------------------------+---------------------------+
+                    """,
+                },
+            )
+
+        # in original pandas, e.g. for https://pandas.pydata.org/pandas-docs/version/1.5.3/reference/api/pandas.Series.sub.html
+        # and all the other operators a standard example is used with the following data and always fill_value=0
+        # a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'])
+        # b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e'])
+        # a <op> b
+        # we explicitly re-generate it here to get the correct behavior without fill_value.
+        # there are also obvious doc gaps in pandas, so the best would be to generate the examples properly using Snowpark pandas API here.
+        # because Snowpark pandas API requires a connection, for now use pandas.
+        import pandas as pd
+
+        # we differ here with the data from pandas docs, because pandas has different div by zero behavior
+        # this here is the original pandas example data:
+        # a_data = {'data':[1, 1, 1, np.nan], 'index':["a", "b", "c", "d"]}
+        # b_data = {'data': [0, 1, 2, np.nan, 1], 'index' : ["a", "b", "c", "d", "f"]}
+        a_data = {"data": [1, -2, 0, np.nan], "index": ["a", "b", "c", "d"]}
+        b_data = {"data": [-2, 1, 3, np.nan, 1], "index": ["a", "b", "c", "d", "f"]}
+        a_doc = "pd.Series([1, -2, 0, np.nan], index=['a', 'b', 'c', 'd'])"
+        b_doc = "pd.Series([-2, 1, 3, np.nan, 1], index=['a', 'b', 'c', 'd', 'f'])"
+
+        # to avoid div-by-zero, swap for rtruediv and rfloordiv a,b
+        if op_name in ["rtruediv", "rfloordiv"]:
+            a_data, b_data = b_data, a_data
+            a_doc, b_doc = b_doc, a_doc
+
+        # Snowpark and native pandas can have different behavior for negative mod operations.
+        if op_name in ["mod", "rmod"]:
+            a_data = {"data": [1, 2, 0, np.nan], "index": ["a", "b", "c", "d"]}
+            b_data = {"data": [2, 1, 3, np.nan, 1], "index": ["a", "b", "c", "d", "f"]}
+            a_doc = "pd.Series([1, 2, 0, np.nan], index=['a', 'b', 'c', 'd'])"
+            b_doc = "pd.Series([2, 1, 3, np.nan, 1], index=['a', 'b', 'c', 'd', 'f'])"
+
+        a = pd.Series(**a_data)
+        b = pd.Series(**b_data)
+
+        assert hasattr(a, op_name)
+        ans = getattr(a, op_name)(b)
+
+        # in Snowflake, comparing with NULL returns NULL
+        if op_name in ["eq", "ne", "ge", "le", "gt", "lt"]:
+            ans = ans.astype(object)
+            ans[["d", "f"]] = None
+
+        content = ">>> a = " + a_doc + "\n"
+        content += ">>> a\n"
+        content += str(a) + "\n"
+        content += ">>> b = " + b_doc + "\n"
+        content += ">>> b\n"
+        content += str(b) + "\n"
+        content += f">>> a.{op_name}(b)\n"
+        content += str(ans) + "\n"
+
+        # replace examples section
+        index = find_index(sections, lambda s: s["heading"] == "Examples")
+        assert index is not None
+        sections[index] = {"heading": "Examples", "content": content}
+
+        return assemble_sections_to_string(sections)
+
+    return _inherit_docstrings(
+        parent,
+        excluded,
+        overwrite_existing=overwrite_existing,
+        apilink=apilink,
+        modify_doc=modify_doc,
+    )
+
+
+# TODO add proper type annotation
+def to_pandas(modin_obj: SupportsPrivateToPandas) -> Any:
+    """
+    Convert a Modin DataFrame/Series to a pandas DataFrame/Series.
+
+    Parameters
+    ----------
+    modin_obj : modin.DataFrame, modin.Series
+        The Modin DataFrame/Series to convert.
+
+    Returns
+    -------
+    pandas.DataFrame or pandas.Series
+        Converted object with type depending on input.
+    """
+    return modin_obj._to_pandas()
+
+
+def hashable(obj: bool) -> bool:
+    """
+    Return whether the `obj` is hashable.
+
+    Parameters
+    ----------
+    obj : object
+        The object to check.
+
+    Returns
+    -------
+    bool
+    """
+    # Happy path: if there's no __hash__ method, the object definitely isn't hashable
+    if not hasattr(obj, "__hash__"):
+        return False
+    # Otherwise, we may still need to check for type errors, as in the case of `hash(([],))`.
+    # (e.g. an unhashable object inside a tuple)
+    try:
+        hash(obj)
+    except TypeError:
+        return False
+    return True
+
+
+def try_cast_to_pandas(obj: Any, squeeze: bool = False) -> Any:
+    """
+    Convert `obj` and all nested objects from Modin to pandas if it is possible.
+
+    If no convertion possible return `obj`.
+
+    Parameters
+    ----------
+    obj : object
+        Object to convert from Modin to pandas.
+    squeeze : bool, default: False
+        Squeeze the converted object(s) before returning them.
+
+    Returns
+    -------
+    object
+        Converted object.
+    """
+    if isinstance(obj, SupportsPrivateToPandas):
+        result = obj._to_pandas()
+        if squeeze:
+            result = result.squeeze(axis=1)
+        return result
+    if isinstance(obj, SupportsPublicToPandas):
+        result = obj.to_pandas()
+        if squeeze:
+            result = result.squeeze(axis=1)
+        # Query compiler case, it doesn't have logic about convertion to Series
+        if (
+            isinstance(getattr(result, "name", None), str)
+            and result.name == MODIN_UNNAMED_SERIES_LABEL
+        ):
+            result.name = None
+        return result
+    if isinstance(obj, (list, tuple)):
+        return type(obj)([try_cast_to_pandas(o, squeeze=squeeze) for o in obj])
+    if isinstance(obj, dict):
+        return {k: try_cast_to_pandas(v, squeeze=squeeze) for k, v in obj.items()}
+    if callable(obj):
+        module_hierarchy = getattr(obj, "__module__", None)
+        module_hierarchy = (
+            [""] if module_hierarchy is None else module_hierarchy.split(".")
+        )
+        fn_name = getattr(obj, "__name__", None)
+        if fn_name and module_hierarchy[0] == "modin":
+            return (
+                getattr(pandas.DataFrame, fn_name, obj)
+                if module_hierarchy[-1] == "dataframe"
+                else getattr(pandas.Series, fn_name, obj)
+            )
+    return obj
+
+
+def wrap_into_list(*args: Any, skipna: bool = True) -> list[Any]:
+    """
+    Wrap a sequence of passed values in a flattened list.
+
+    If some value is a list by itself the function appends its values
+    to the result one by one instead inserting the whole list object.
+
+    Parameters
+    ----------
+    *args : tuple
+        Objects to wrap into a list.
+    skipna : bool, default: True
+        Whether or not to skip nan or None values.
+
+    Returns
+    -------
+    list
+        Passed values wrapped in a list.
+    """
+
+    def isnan(o: Any) -> bool:
+        return o is None or (isinstance(o, float) and np.isnan(o))
+
+    res = []
+    for o in args:
+        if skipna and isnan(o):
+            continue
+        if isinstance(o, list):
+            res.extend(o)
+        else:
+            res.append(o)
+    return res
+
+
+def wrap_udf_function(func: Callable) -> Callable:
+    """
+    Create a decorator that makes `func` return pandas objects instead of Modin.
+
+    Parameters
+    ----------
+    func : callable
+        Function to wrap.
+
+    Returns
+    -------
+    callable
+    """
+
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        result = func(*args, **kwargs)
+        # if user accidently returns modin DataFrame or Series
+        # casting it back to pandas to properly process
+        return try_cast_to_pandas(result)
+
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+
+def instancer(_class: Callable[[], T]) -> T:
+    """
+    Create a dummy instance each time this is imported.
+
+    This serves the purpose of allowing us to use all of pandas plotting methods
+    without aliasing and writing each of them ourselves.
+
+    Parameters
+    ----------
+    _class : object
+
+    Returns
+    -------
+    object
+        Instance of `_class`.
+    """
+    return _class()
+
+
+def import_optional_dependency(name: str, message: str) -> types.ModuleType:
+    """
+    Import an optional dependecy.
+
+    Parameters
+    ----------
+    name : str
+        The module name.
+    message : str
+        Additional text to include in the ImportError message.
+
+    Returns
+    -------
+    module : ModuleType
+        The imported module.
+    """
+    try:
+        return importlib.import_module(name)
+    except ImportError:
+        raise ImportError(
+            f"Missing optional dependency '{name}'. {message} "
+            + f"Use pip or conda to install {name}."
+        ) from None
+
+
+def is_property(key: Any) -> bool:
+    """Check whether `key` is a property."""
+    return type(key) == property
+
+
+def error_not_implemented_parameter(param: str, condition: bool) -> Optional[NoReturn]:
+    """
+    Raises a ``NotImplementedError`` for a parameter ``param``
+    when its argument ``arg`` is not None.
+
+    Parameters
+    ----------
+    param : str
+    Name of the parameter that is not implemented.
+
+    arg : Any
+    Value of the parameter that is not implemented.
+
+    Raises
+    ------
+    NotImplementedError if ``arg`` is None
+    """
+
+    if condition:
+        ErrorMessage.not_implemented(f"{param} is not implemented.")
+
+    return None
+
+
+def warn_not_supported_parameter(param: str, condition: bool, fn: str) -> None:
+    """
+    Invokes ``WarningMessage.ignored_argument`` for parameter ``param``
+    of function ``fn`` if its argument ``arg`` is not None.
+
+    Parameters
+    ----------
+    param : str
+    Name of the parameter that has been ignored.
+
+    arg : Any
+    Value of the parameter that has been ignored.
+
+    fn : str
+    Name of the function with parameter ``param``
+    """
+
+    if condition:
+        WarningMessage.ignored_argument(fn, param, "")
+
+
+def should_parse_header(
+    header: Optional[Union[int, Sequence[int], Literal["infer"]]],
+    names: Optional[Union[Sequence[Hashable], NoDefault]],
+) -> bool:
+    """
+    Returns whether the first row of a CSV file is used as
+    the column names for a DataFrame.
+
+    Parameters
+    ----------
+    header : int, list of int, None
+    Row number(s) to use as the column names, and the start of the
+    data.
+
+    names : array-like
+    List of column names to use.
+
+    Returns
+    -------
+    Whether the first row of the CSV file will be used as column names.
+    """
+
+    should_parse_first_row = header in ["infer", 0] and (names is None or header == 0)
+
+    return should_parse_first_row
+
+
+def translate_pandas_default(arg: Any) -> Any:
+    """
+    Returns None if `arg` is no_default, otherwise `arg`.
+
+    Parameters
+    ----------
+    arg : Any
+    Argument `arg` to be translated.
+
+    Returns
+    -------
+    None if `arg` is no_default, otherwise the original
+    value of `arg`.
+    """
+
+    return None if arg is no_default else arg
+
+
+def validate_int_kwarg(value: int, arg_name: str, float_allowed: bool = False) -> int:
+    """
+    check whether the argument passed is integer type
+
+    Parameters
+    ----------
+    value : int
+        The integer value to be validated
+    arg_name: str
+        Name of the argument. To be reflected in the error message.
+    float_allowed: bool
+        Whether allow integer value to be represented in float, for example, 1.0.
+        However, values like 2.3 is not valid.
+
+    Returns
+    -------
+    Integer representation of the value.
+
+    ValueError is raised if the value is not a valid integer
+    """
+    if is_integer(value) or (float_allowed and is_float(value) and value.is_integer()):  # type: ignore[attr-defined]
+        return int(value)
+
+    raise ValueError(f"{arg_name} must be an integer")
+
+
+def doc_replace_dataframe_with_link(_obj: Any, doc: str) -> str:
+    """
+    Helper function to be passed as the `modify_doc` parameter to `_inherit_docstrings`. This replaces
+    all unqualified instances of "DataFrame" with ":class:`~snowflake.snowpark.pandas.DataFrame`" to
+    prevent it from linking automatically to snowflake.snowpark.DataFrame: see SNOW-1233342.
+
+    To prevent it from overzealously replacing examples in doctests or already-qualified paths, it
+    replaces matches of DataFrame that are preceded by whitespace and followed by whitespace or a comma
+    (to cover cases like "param: Series or DataFrame, optional").
+    """
+    return re.sub(
+        r"(?<=\s)DataFrame(?=[\s,])",
+        ":class:`~snowflake.snowpark.modin.pandas.DataFrame`",
+        doc,
+    )
+
+
+class classproperty:
+    """
+    Decorator that allows creating read-only class properties.
+
+    Parameters
+    ----------
+    func : method
+
+    Examples
+    --------
+    >>> class A:
+    ...     field = 10
+    ...     @classproperty
+    ...     def field_x2(cls):
+    ...             return cls.field * 2
+    ...
+    >>> print(A.field_x2)
+    20
+    """
+
+    def __init__(self, func: Any) -> None:
+        self.fget = func
+
+    def __get__(self, instance: Any, owner: Any) -> Any:  # noqa: GL08
+        return self.fget(owner)
diff --git a/tests/conftest.py b/tests/conftest.py
index 554fea77d54..eeb0173d651 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,6 +11,22 @@
 
 logging.getLogger("snowflake.connector").setLevel(logging.ERROR)
 
+# TODO: SNOW-1305522: Enable Modin doctests for the below frontend files
+excluded_frontend_files = [
+    "accessor.py",
+    "groupby.py",
+    "resample.py",
+    "series_utils.py",
+    "window.py",
+]
+
+
+def is_excluded_frontend_file(path):
+    for excluded in excluded_frontend_files:
+        if str(path).endswith(excluded):
+            return True
+    return False
+
 
 def pytest_addoption(parser):
     parser.addoption("--disable_sql_simplifier", action="store_true", default=False)
@@ -22,6 +38,7 @@ def pytest_collection_modifyitems(items) -> None:
     """Applies tags to tests based on folders that they are in."""
     top_test_dir = Path(__file__).parent
     top_doctest_dir = top_test_dir.parent.joinpath("src/snowflake/snowpark")
+    modin_doctest_dir = top_doctest_dir.joinpath("modin/pandas")
     for item in items:
         item_path = Path(str(item.fspath)).parent
         try:
@@ -35,6 +52,10 @@ def pytest_collection_modifyitems(items) -> None:
             # we raise an exception for all other dirs that are passed in
             if item_path == top_doctest_dir:
                 item.add_marker("doctest")
+            elif str(item_path).startswith(str(modin_doctest_dir)):
+                if not is_excluded_frontend_file(item.fspath):
+                    item.add_marker("doctest")
+                    item.add_marker(pytest.mark.usefixtures("add_doctest_imports"))
             else:
                 raise e
 
diff --git a/tests/integ/modin/__init__.py b/tests/integ/modin/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/binary/__init__.py b/tests/integ/modin/binary/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/binary/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/binary/test_binary_default2pandas.py b/tests/integ/modin/binary/test_binary_default2pandas.py
new file mode 100644
index 00000000000..759089beed1
--- /dev/null
+++ b/tests/integ/modin/binary/test_binary_default2pandas.py
@@ -0,0 +1,81 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="function")
+def snow_and_native_df():
+    data = [[1, 2], [3, 4]]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    return snow_df, native_df
+
+
+@pytest.fixture(scope="function")
+def snow_and_native_df_nan():
+    data = [[1, 2], [3, np.nan]]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    return snow_df, native_df
+
+
+# TODO: SNOW-1056369 : Implement binary operation __xor__
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize("func", [lambda df: df.__xor__([-1, 0])])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_binary_op_on_list_like_value(snow_and_native_df, func):
+    eval_snowpark_pandas_result(*snow_and_native_df, func)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_binary_xor_on_df(snow_and_native_df):
+    eval_snowpark_pandas_result(*snow_and_native_df, lambda df: df ^ df)
+
+
+@pytest.mark.skip(
+    reason="TODO: SNOW-896220 support dot. It raises NotImplementedError today"
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.dot(df),
+        lambda df: df[0].dot(df[1]),
+        lambda df: df[0] @ df,
+        lambda df: df @ df[1],
+    ],
+)
+def test_binary_op_dot(snow_and_native_df, func):
+    def compare(snow_result, pd_result, **kwargs):
+        if not isinstance(
+            pd_result, (native_pd.DataFrame, native_pd.Series)
+        ) and not isinstance(snow_result, (pd.DataFrame, pd.Series)):
+            assert pd_result == snow_result
+        else:
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow_result, pd_result, **kwargs
+            )
+
+    eval_snowpark_pandas_result(*snow_and_native_df, func, comparator=compare)
diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py
new file mode 100644
index 00000000000..e6d68bad8c7
--- /dev/null
+++ b/tests/integ/modin/binary/test_binary_op.py
@@ -0,0 +1,2519 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime
+import math
+import operator
+import random
+
+import modin.pandas as pd
+import numpy as np
+import pandas
+import pandas as native_pd
+import pytest
+from pandas.core.dtypes.common import is_object_dtype
+from pandas.testing import assert_frame_equal, assert_series_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.series.test_bitwise_operators import try_cast_to_snow_series
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    create_test_dfs,
+    create_test_series,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        # addition
+        lambda x: x.add(1),
+        lambda x: x.radd(-1.1),
+        lambda x: x + 1,
+        lambda x: -1.1 + x,
+        # subtraction
+        lambda x: x.sub(2),
+        lambda x: x.rsub(-2.2),
+        lambda x: x - 2,
+        lambda x: -2.2 - x,
+        # multiplication
+        lambda x: x.mul(0),
+        lambda x: x.rmul(-3.3),
+        lambda x: x * 3,
+        lambda x: 0 * x,
+        # division
+        lambda x: x.rdiv(-4.4),
+        lambda x: x.truediv(4),
+        lambda x: 0 / x,
+        # floor division
+        lambda x: x.rfloordiv(-5.5),
+        lambda x: x // 5,
+        lambda x: 0 // x,
+        # pow
+        lambda x: x.pow(0),
+        lambda x: x.rpow(-7.7),
+        lambda x: x**1,
+        lambda x: 1.0**x,
+    ],
+)
+@sql_count_checker(query_count=4)
+def test_binary_arithmetic_method_number_scalar(func):
+    # test both NULL and NaN in Snowflake
+    data = [[-10, 1, 1.5], [100000, math.e, "NaN"], [-100000, math.pi, None]]
+
+    snow_df = pd.DataFrame(data, dtype="Float64")
+    native_df = native_pd.DataFrame(data, dtype="Float64")
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snow_df, native_df, func)
+    # test series
+    for c in snow_df.columns:
+        snow_series = snow_df[c]
+        native_series = native_df[c]
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_series, native_series, func, check_names=False
+            )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x.div(0),
+        lambda x: x.rtruediv(0),
+        lambda x: x / 0,
+        lambda x: x // 0,
+        lambda x: x.floordiv(0),
+        lambda x: x.rdiv(-4.4),
+        lambda x: x.rfloordiv(-5.5),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_binary_arithmetic_method_number_scalar_negative(func):
+    # test both NULL and NaN in Snowflake
+    data = [[0, 1, 1.5], [-math.e, 0, "NaN"], [math.pi, None, 0]]
+    snow_df = pd.DataFrame(data, dtype="Float64")
+
+    # test dataframe
+    with pytest.raises(SnowparkSQLException, match="Division by zero"):
+        func(snow_df).to_pandas()  # eval with to_pandas, b.c. lazy
+
+    # test series
+    for c in snow_df.columns:
+        with pytest.raises(SnowparkSQLException, match="Division by zero"):
+            snow_series = snow_df[c]
+            func(snow_series).to_pandas()  # eval with to_pandas, b.c. lazy
+
+
+@sql_count_checker(query_count=2)
+def test_binary_pow_scalar_different_from_pandas():
+    data = [[0, 1, 1.5], [100000, math.e, np.nan], [-100000, math.pi, None]]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    result1 = (snow_df**7).to_pandas()
+    # this is different from pandas because originally this series is int64 type and
+    # powering 7 still result in an integer, which will overflow. However, Snowflake's power
+    # always creates a float
+    assert_series_equal(
+        result1[0],
+        native_pd.Series([row[0] ** 7 for row in data]).astype(float),
+        check_names=False,
+        check_index_type=False,
+    )
+    # other parts are the same as pandas
+    assert_frame_equal(
+        result1.loc[:, 1:],
+        native_df.loc[:, 1:] ** 7,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+    result2 = (0**snow_df).to_pandas()
+    # this is different from pandas because pandas doesn't allow a ** b,
+    # where a is an integer and b is a negative integer. However, Snowflake allows it and return
+    # correct results
+    assert_series_equal(
+        result2[0],
+        native_pd.Series([1.0, 0.0, np.inf]),
+        check_names=False,
+        check_index_type=False,
+    )
+    with pytest.raises(
+        ValueError, match="Integers to negative integer powers are not allowed."
+    ):
+        0**native_df
+    # other parts are the same as pandas
+    assert_frame_equal(
+        result2.loc[:, 1:],
+        0 ** native_df.loc[:, 1:],
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        # concatenation
+        lambda x: x.add("111"),
+        lambda x: x.radd(""),
+        lambda x: x + "NaN",
+        lambda x: "@$#%'\"" + x,
+        # repetition
+        lambda x: x.mul(100000),
+        lambda x: x.rmul(1),
+        lambda x: x * 0,
+        lambda x: -1 * x,
+    ],
+)
+@sql_count_checker(query_count=3)
+def test_binary_arithmetic_method_string_scalar(func):
+    data = [["snowflake", ""], ["熊猫", "@$#%'\""]]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snow_df, native_df, func)
+    # test series
+    for i, c in enumerate(snow_df.columns):
+        # TODO SNOW-862677: use snow_df[c] when __getitem__ is implemented
+        snow_series = pd.Series([row[i] for row in data])
+        native_series = native_df[c]
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_series, native_series, func, check_names=False
+            )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + np.nan,
+        lambda x: 0 + x,
+        lambda x: x - np.nan,
+        lambda x: 0.0 - x,
+        lambda x: x * np.nan,
+        lambda x: 0.0 * x,
+        lambda x: np.nan / x,
+        lambda x: x // np.nan,
+        lambda x: 0 // x,
+        lambda x: x % np.nan,
+        lambda x: 0.0 % x,
+        lambda x: x**0,
+        lambda x: 0.0**x,
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_binary_arithmetic_method_null_nan_scalar(func):
+    snow_df = pd.DataFrame([None, "NaN"], dtype="Float64")
+    native_df = native_pd.DataFrame([None, "NaN"], dtype="Float64")
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x * 1.1,
+        lambda x: x - "s",
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_binary_arithmetic_method_string_scalar_negative(func):
+    data = ["snowflake"]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        func,
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=SnowparkSQLException,
+        expect_exception_match="Numeric value .+ is not recognized",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_binary_arithmetic_method_array_scalar():
+    snow_df = pd.DataFrame([[[1]]])
+    # TODO SNOW-862613: Support array repetition in df.mul
+    # This is working in pandas and will return [1, 1]
+    with pytest.raises(
+        SnowparkSQLException, match="Invalid argument types for function"
+    ):
+        (snow_df * 2).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "data, scalars",
+    [
+        [
+            [1, 2.5],
+            [
+                np.int8(1),
+                np.int16(1),
+                np.int32(1),
+                np.int64(1),
+                np.uint(1),
+                np.longlong(1),
+                np.half(7.4),
+                np.float32(2.5),
+                np.float64(2.5),
+                np.double(2.5),
+            ],
+        ],
+        [[True, False], [np.bool_(True)]],
+        [
+            [datetime.datetime(2021, 1, 1), datetime.datetime(2022, 12, 13)],
+            [
+                np.datetime64("2022-12-13"),
+                native_pd.Timestamp(2021, 1, 1),
+                pd.Timestamp(1513393355, unit="s", tz="US/Pacific"),
+            ],
+        ],
+        [[1, 2, None, 4], [np.nan, pandas.NA, pandas.NaT]],
+    ],
+)
+def test_binary_method_numpy_and_pandas_type_scalar(data, scalars):
+    for scalar in scalars:
+        with SqlCounter(query_count=1):
+            snow_series = pd.Series(data)
+            if pd.isna(scalar):
+                # In Snowflake anything = NULL returns NULL
+                assert (snow_series == scalar).to_pandas().tolist() == [None] * len(
+                    data
+                )
+            else:
+                native_series = native_pd.Series(data)
+                eval_snowpark_pandas_result(
+                    snow_series, native_series, lambda x, value=scalar: x == value
+                )
+
+
+@pytest.mark.parametrize(
+    "data, scalars, expected_query_count",
+    [
+        [[1, 2, 4], [0, -1.1, 1e7], 3],
+        [[100000, math.e, -100000], [0, -1.1, 1e7], 3],
+        [["snowflake", "", "熊猫", "@$#%'\""], ["snowflake", "", "a", "z"], 4],
+        [
+            [
+                datetime.datetime(2023, 1, 1),
+                datetime.datetime(2023, 8, 19, 1, 2, 3),
+            ],
+            [datetime.datetime(2023, 1, 1), datetime.datetime(2023, 8, 19, 1, 2, 3)],
+            2,
+        ],
+        [
+            [
+                datetime.datetime(2023, 1, 1, tzinfo=datetime.timezone.utc),
+                datetime.datetime(
+                    2023,
+                    8,
+                    19,
+                    1,
+                    2,
+                    3,
+                    tzinfo=datetime.timezone(datetime.timedelta(hours=10)),
+                ),
+            ],
+            [
+                datetime.datetime(2023, 1, 1, tzinfo=datetime.timezone.utc),
+                datetime.datetime(
+                    2023,
+                    8,
+                    19,
+                    1,
+                    2,
+                    3,
+                    tzinfo=datetime.timezone(datetime.timedelta(hours=10)),
+                ),
+            ],
+            2,
+        ],
+    ],
+)
+@pytest.mark.parametrize(
+    "op", [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]
+)
+@pytest.mark.parametrize("obj_type", ["DataFrame", "Series"])
+def test_binary_comparison_method_scalar(
+    data, scalars, op, obj_type, expected_query_count
+):
+    if obj_type == "DataFrame":
+        snow_obj = pd.DataFrame([data, data])
+        native_obj = native_pd.DataFrame([data, data])
+    else:
+        snow_obj = pd.Series(data)
+        native_obj = native_pd.Series(data)
+    with SqlCounter(query_count=expected_query_count):
+        for scalar in scalars:
+            eval_snowpark_pandas_result(
+                snow_obj, native_obj, lambda x, value=scalar: op(x, value)
+            )
+
+
+@sql_count_checker(query_count=0)
+def test_multiple_comparison_negative():
+    snow_series = pd.Series([1])
+    native_series = native_pd.Series([1])
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda x: 0 < x <= 1,
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=False,
+        expect_exception_match="is ambiguous",
+    )
+
+
+@pytest.mark.parametrize(
+    "data, scalar",
+    [
+        [[1], "s"],
+        [[[bytes("s", "utf-8")]], "s"],
+        [[["s"]], "s"],
+        [[{"s": "s"}], "s"],
+        [[datetime.datetime(2023, 1, 1)], datetime.datetime(2023, 1, 1).timestamp()],
+    ],
+)
+@pytest.mark.parametrize("op", [operator.gt, operator.ge, operator.lt, operator.le])
+@sql_count_checker(query_count=0)
+def test_binary_comparison_between_non_supported_types(data, scalar, op):
+    snow_series = pd.Series(data)
+    native_series = native_pd.Series(data)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda x: op(x, scalar),
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=SnowparkSQLException,
+    )
+
+
+def list_like_rhs_params(values):
+    return [
+        pytest.param(values, id="list"),
+        # The ndarray is created with the float dtype to avoid raising TypeError for operations between
+        # a float and NoneType.
+        pytest.param(np.array(values, dtype=float), id="ndarray"),
+        pytest.param(pd.Index(values), id="index"),
+        pytest.param(pd.Index(values, name="some name"), id="index_with_name"),
+    ]
+
+
+@pytest.mark.parametrize("op", ["__or__", "__ror__", "__and__", "__rand__"])
+@pytest.mark.parametrize(
+    "rhs", list_like_rhs_params([True, False, True, False, False, False])
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_logic_operations_between_series_and_list_like(op, rhs):
+    lhs = [True, True, False, False, True, False]
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda df: getattr(df, op)(rhs)
+    )
+
+
+@pytest.mark.parametrize("op", ["__or__", "__ror__", "__and__", "__rand__"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([True, False, True]))
+@sql_count_checker(query_count=1)
+def test_binary_logic_operations_between_df_and_list_like(op, rhs):
+    # These operations are performed only on axis=1 for dataframes. There is no axis parameter for the op.
+    lhs = [[True, True, False], [False, False, True]]
+    eval_snowpark_pandas_result(*create_test_dfs(lhs), lambda df: getattr(df, op)(rhs))
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "eq",
+        "ne",
+        "gt",
+        "lt",
+        "ge",
+        "le",
+    ],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([0, 2, -11, -12, -99]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_comparison_between_series_and_list_like(op, rhs):
+    lhs = [1, 2, -10, 3.14, -99]
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda df: getattr(df, op)(rhs)
+    )
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "eq",
+        "ne",
+        "gt",
+        "lt",
+        "ge",
+        "le",
+    ],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([1, 2, 3]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_comparison_between_df_and_list_like_on_axis_0(op, rhs):
+    lhs = [[1, 2], [2, 3], [1, 3]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=0)
+    )
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "eq",
+        "ne",
+        "gt",
+        "lt",
+        "ge",
+        "le",
+    ],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([1, 2, 3]))
+@sql_count_checker(query_count=1)
+def test_binary_comparison_between_df_and_list_like_on_axis_1(op, rhs):
+    lhs = [[1, 2, 2], [3, 1, 3]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=1)
+    )
+
+
+TEST_DATA_FOR_BINARY_SERIES_NUMERIC = [
+    native_pd.DataFrame([(1, 1), (1, -2), (0, -1)], columns=[0, 1]),
+    native_pd.DataFrame([(1, 1.0), (1, -2.0), (0, -1.657)], columns=[0, 1]),
+    native_pd.DataFrame([(10.01, 1.2), (1.3, -2.4), (0.1, -1.4)], columns=[0, 1]),
+    native_pd.DataFrame([(1, None), (None, None), (None, -1), (42, 3)], columns=[0, 1]),
+    native_pd.DataFrame(
+        [(1, None), (None, None), (None, -2.0), (0, -1.657)], columns=[0, 1]
+    ),
+    native_pd.DataFrame(
+        [(10.01, None), (None, None), (None, -2.4), (0.1, -1.4)], columns=[0, 1]
+    ),
+]
+
+TEST_DATA_FOR_BINARY_SERIES_STRING = [
+    native_pd.DataFrame([("abc", ""), ("s", "a"), ("", "-1.4")], columns=[0, 1]),
+    native_pd.DataFrame(
+        [("abc", None), (None, None), (None, "a"), ("", "-1.4")], columns=[0, 1]
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "native_df",
+    TEST_DATA_FOR_BINARY_SERIES_NUMERIC + TEST_DATA_FOR_BINARY_SERIES_STRING,
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_binary_add_between_series(native_df):
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[0] + df[1],
+    )
+
+
+def _gen_random_int_list_with_nones(N: int) -> list[int]:
+    assert N > 3
+    arr = [random.randint(-1000000, 1000000) for _ in range(N)]
+
+    indices = np.random.choice(list(range(N)), size=2, replace=False)
+
+    # place at least one None
+    arr[indices[0]] = None
+
+    # place at least one 0
+    arr[indices[1]] = 0
+
+    return arr
+
+
+def _gen_random_float_list_with_nones(N: int, scale=1.0) -> list[float]:
+    assert N > 4
+    arr = [np.random.normal(loc=0.0, scale=scale) for _ in range(N)]
+
+    # add Nones, and various NaN
+    indices = np.random.choice(list(range(N)), size=3, replace=False)
+    arr[indices[0]] = np.nan
+    arr[indices[1]] = pd.NA
+    arr[indices[2]] = None
+
+    return arr
+
+
+# exclude S1/S2 boolean series for testing here because Snowflake doesn't support it.
+S1 = native_pd.Series(
+    [
+        False,
+        False,
+        True,
+        True,
+        None,
+        None,
+        False,
+        False,
+        True,
+        True,
+        None,
+        None,
+        False,
+        False,
+        True,
+        True,
+        None,
+        None,
+    ]
+)
+S2 = native_pd.Series(
+    [
+        False,
+        True,
+        None,
+        False,
+        True,
+        None,
+        False,
+        True,
+        None,
+        False,
+        True,
+        None,
+        False,
+        True,
+        None,
+        False,
+        True,
+        None,
+    ]
+)
+S3 = _gen_random_int_list_with_nones(18)
+S4 = _gen_random_int_list_with_nones(18)
+S5 = _gen_random_float_list_with_nones(18)
+S6 = _gen_random_float_list_with_nones(18, scale=0.002)
+ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES = [S3, S4, S5, S6]
+
+all_supported_binary_ops = pytest.mark.parametrize(
+    "op",
+    [
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "mod",
+        "rmod",
+        "pow",
+        "rpow",
+        "__or__",
+        "__ror__",
+        "__and__",
+        "__rand__",
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "eq",
+        "ne",
+        "gt",
+        "lt",
+        "ge",
+        "le",
+    ],
+)
+
+
+@pytest.mark.parametrize("lhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize("rhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize("op", [operator.add, operator.sub, operator.mul])
+@sql_count_checker(query_count=1, join_count=1)
+def test_arithmetic_binary_ops_between_series_for_numeric_data(lhs, rhs, op):
+    snow_ans = op(pd.Series(lhs), pd.Series(rhs))
+    native_ans = op(native_pd.Series(lhs), native_pd.Series(rhs))
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+# The goal of tests below is to check whether fill_value is working as expected. Since we are testing all the
+# arithmetic binary operations here:
+# - 0 must be omitted from lhs and rhs series for division operators
+# - mod is computed differently in Snowflake and native pandas when the lhs and rhs are of different signs.
+# - np.nan and None are treated as the same value since fill_value is supposed to be a float.
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "mod",
+        "rmod",
+        "pow",
+        "rpow",
+    ],
+)
+class TestFillValue:
+    # This class tests the fill_value parameter with arithmetic operations.
+
+    @pytest.mark.parametrize("fill_value", [3.14])
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_arithmetic_ops_between_series(self, op, fill_value):
+        # Test whether fill_value param works for Series <op> Series.
+        lhs = [-14, 16.8, np.nan, 175, np.nan]
+        rhs = [-1.44, 99, 5, np.nan, np.nan]
+
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        def op_helper(ser):
+            other = (
+                pd.Series(rhs) if isinstance(ser, pd.Series) else native_pd.Series(rhs)
+            )
+            return getattr(ser, op)(other=other, fill_value=fill_value)
+
+        eval_snowpark_pandas_result(*create_test_series(lhs), op_helper)
+
+    @pytest.mark.parametrize("fill_value", [10.001])
+    @sql_count_checker(query_count=1)
+    def test_binary_arithmetic_ops_between_series_and_scalar(self, op, fill_value):
+        # Test whether fill_value param works for Series <op> scalar.
+        lhs = [14, 16.8, np.nan, 175, np.nan]
+        rhs = 89
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        eval_snowpark_pandas_result(
+            *create_test_series(lhs),
+            lambda ser: getattr(ser, op)(rhs, fill_value=fill_value),
+        )
+
+    @pytest.mark.parametrize("fill_value", [10.001])
+    @sql_count_checker(query_count=1)
+    def test_binary_arithmetic_ops_between_series_and_scalar_nan(self, op, fill_value):
+        # Test whether fill_value param works for Series <op> scalar.
+        lhs = [14, 16.8, np.nan, 175, np.nan]
+        rhs = np.nan
+
+        # Snowpark pandas behavior
+        # using fill_value as the rhs in the native pandas operation below
+        expected_snowpark_pandas_result = getattr(native_pd.Series(lhs), op)(fill_value)
+        actual_snowpark_pandas_result = getattr(pd.Series(lhs), op)(
+            rhs, fill_value=fill_value
+        )
+        eval_snowpark_pandas_result(
+            actual_snowpark_pandas_result,
+            expected_snowpark_pandas_result,
+            lambda ser: ser,
+            atol=0.001,
+        )
+
+    @pytest.mark.parametrize("fill_value", [222])
+    @sql_count_checker(query_count=1)
+    def test_binary_arithmetic_ops_between_df_and_scalar(self, op, fill_value):
+        # Test whether fill_value param works for DataFrame <op> scalar.
+        lhs = [[14, 16.8], [np.nan, 175], [np.nan, math.e]]
+        rhs = 89
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        eval_snowpark_pandas_result(
+            *create_test_dfs(lhs),
+            lambda ser: getattr(ser, op)(rhs, fill_value=fill_value),
+        )
+
+    @pytest.mark.parametrize("fill_value", [10.001])
+    @sql_count_checker(query_count=1)
+    def test_binary_arithmetic_ops_between_df_and_scalar_nan_behavior_deviates(
+        self, op, fill_value
+    ):
+        # Test whether fill_value param works for DataFrame <op> scalar. Here, the behavior in Snowpark pandas and
+        # native pandas is different. Snowpark pandas treats np.nan as a NULL value while native pandas uses it
+        # as the rhs for all operations, therefore making the result np.nan.
+        lhs = [[14, 16.8], [np.nan, 175], [np.nan, math.e]]
+        rhs = np.nan
+
+        # Native pandas behavior
+        expected_native_pandas_result = native_pd.DataFrame([[np.nan, np.nan]] * 3)
+        actual_native_pandas_result = getattr(native_pd.DataFrame(lhs), op)(
+            rhs, fill_value=fill_value
+        )
+        assert_frame_equal(actual_native_pandas_result, expected_native_pandas_result)
+
+        # Snowpark pandas behavior
+        # using fill_value as the rhs in the native pandas operation below
+        expected_snowpark_pandas_result = getattr(native_pd.DataFrame(lhs), op)(
+            fill_value
+        )
+        actual_snowpark_pandas_result = getattr(pd.DataFrame(lhs), op)(
+            rhs, fill_value=fill_value
+        )
+        eval_snowpark_pandas_result(
+            actual_snowpark_pandas_result,
+            expected_snowpark_pandas_result,
+            lambda df: df,
+            atol=0.001,
+        )
+
+    @pytest.mark.parametrize(
+        "rhs", list_like_rhs_params([-14, np.nan, 24.4, 175, np.nan])
+    )
+    @pytest.mark.parametrize("fill_value", [24])
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_arithmetic_ops_between_series_and_list_like(
+        self, op, rhs, fill_value
+    ):
+        # Test whether fill_value param works for Series <op> list-like object.
+        lhs = [-14, 16.8, 175, np.nan, np.nan]
+
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        def op_helper(ser):
+            other = rhs
+            if isinstance(other, pd.Index) and isinstance(ser, native_pd.Series):
+                # Native pandas does not support binary operations between a Series and list-like objects -
+                # Series <op> list-like works as expected for all cases except when rhs is an Index object.
+                index_as_list = other.tolist()
+                # Since the behavior of all list-like objects is supposed to be the same, convert index to a list
+                # for native pandas and compare it with the index version for Snowpark pandas.
+                # The issue is that native pandas calls isna() directly on other and errors with:
+                # ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
+                other = index_as_list
+            return getattr(ser, op)(other=other, fill_value=fill_value)
+
+        eval_snowpark_pandas_result(*create_test_series(lhs), op_helper)
+
+    @pytest.mark.parametrize("rhs", list_like_rhs_params([2, np.nan, np.nan]))
+    @pytest.mark.parametrize("fill_value", [10])
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_arithmetic_ops_between_df_and_list_like_axis_0(
+        self, op, rhs, fill_value
+    ):
+        # Test whether fill_value param works for DataFrame <op> list-like object on axis=0.
+        lhs = [
+            [59, 189.5, 1.55, 15],
+            [414, 67.8, np.nan, np.nan],
+            [9, 9.14, np.nan, np.nan],
+        ]
+
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        eval_snowpark_pandas_result(
+            *create_test_dfs(lhs),
+            lambda df: getattr(df, op)(other=rhs, fill_value=fill_value, axis=0),
+        )
+
+    @pytest.mark.parametrize("rhs", list_like_rhs_params([-55, -2, np.nan, np.nan]))
+    @pytest.mark.parametrize("fill_value", [-5.5])
+    @sql_count_checker(query_count=1)
+    def test_binary_arithmetic_ops_between_df_and_list_like_axis_1(
+        self, op, rhs, fill_value
+    ):
+        # Test whether fill_value param works for DataFrame <op> list-like object on axis=1.
+        lhs = [[np.nan, -67.8, -35, np.nan], [-9, -3, np.nan, np.nan]]
+
+        # fill_value is supposed to be used when either the lhs or rhs is NaN, not when both are NaN.
+        eval_snowpark_pandas_result(
+            *create_test_dfs(lhs),
+            lambda df: getattr(df, op)(other=rhs, fill_value=fill_value, axis=1),
+        )
+
+    @pytest.mark.parametrize("fill_value", [native_pd.Series([1, 2, 3]), []])
+    @sql_count_checker(query_count=0)
+    def test_binary_arithmetic_ops_with_non_scalar_fill_value_negative(
+        self, op, fill_value
+    ):
+        lhs = [-0.99, 16.8, np.nan, 175, np.nan]
+        rhs = [-1.4, np.nan, 24.4, 2, np.nan]
+
+        # Native pandas and Snowpark pandas have different error messages.
+        with pytest.raises(
+            ValueError, match="NumPy boolean array indexing assignment cannot assign"
+        ):
+            getattr(native_pd.Series(lhs), op)(other=rhs, fill_value=fill_value)
+        with pytest.raises(ValueError, match="Only scalars can be used as fill_value."):
+            getattr(pd.Series(lhs), op)(other=rhs, fill_value=fill_value)
+
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_arithmetic_ops_with_single_element_list_like_fill_value_negative(
+        self, op
+    ):
+        lhs = [65, 16.8, np.nan, -23, np.nan]
+        rhs = [2, np.nan, 24.4, -7, np.nan]
+
+        # Native pandas allows users to pass in a single element list-like object and use this as the fill_value even
+        # though fill_value is supposed to be a float. Snowpark pandas matches the pandas docstring and only allows
+        # scalar values.
+        fill_value = [1]
+
+        # Same behavior in Snowpark pandas if fill_value is 1.
+        def fill_value_helper(ser):
+            _fill_value = 1 if isinstance(ser, pd.Series) else fill_value
+            return getattr(ser, op)(other=rhs, fill_value=_fill_value)
+
+        eval_snowpark_pandas_result(*create_test_series(lhs), fill_value_helper)
+
+        # Snowpark pandas behavior with [1] fill_value.
+        with pytest.raises(ValueError, match="Only scalars can be used as fill_value."):
+            getattr(pd.Series(lhs), op)(other=rhs, fill_value=fill_value)
+
+    @pytest.mark.parametrize("rhs", [55, np.nan])
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_op_between_series_of_different_lengths(self, op, rhs):
+        lhs = [13, 56, 4.5, np.nan, 0.99]
+        fill_value = 9.9
+
+        def op_helper(ser):
+            other = (
+                pd.Series(rhs) if isinstance(ser, pd.Series) else native_pd.Series(rhs)
+            )
+            return getattr(ser, op)(other=other, fill_value=fill_value)
+
+        eval_snowpark_pandas_result(*create_test_series(lhs), op_helper)
+
+    @pytest.mark.parametrize(
+        "rhs",
+        list_like_rhs_params([55, np.nan]) + list_like_rhs_params([55, np.nan, 2, 3]),
+    )
+    @sql_count_checker(query_count=1, join_count=0)
+    def test_binary_op_between_df_and_list_like_of_different_lengths_axis_1(
+        self, op, rhs
+    ):
+        lhs = [[13, 56, 3], [4.5, 9, np.nan], [2.3, 0.99, 1]]
+        fill_value = 9.9
+
+        # Native pandas requires the number of elements in `other` to be same as the number of columns.
+        def op_helper(df):
+            other = rhs
+            if isinstance(df, native_pd.DataFrame):
+                other = rhs[:3]
+                if len(other) < 3:
+                    other = [55, np.nan, np.nan]
+            return getattr(df, op)(other=other, fill_value=fill_value, axis=1)
+
+        eval_snowpark_pandas_result(*create_test_dfs(lhs), op_helper)
+
+    @pytest.mark.parametrize(
+        "rhs",
+        list_like_rhs_params([-55, np.nan])
+        + list_like_rhs_params([-55, np.nan, -2, -3]),
+    )
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_binary_op_between_df_and_list_like_of_different_lengths_axis_0(
+        self, op, rhs
+    ):
+        lhs = [[-13, -56, -3], [-4.5, -9, np.nan], [-2.3, -0.99, -1]]
+        fill_value = -9.9
+
+        # Native pandas requires the number of elements in `other` to be same as the number of columns.
+        def op_helper(df):
+            other = rhs
+            if isinstance(df, native_pd.DataFrame):
+                other = rhs[:3]
+                if len(other) < 3:
+                    other = [-55, np.nan, np.nan]
+            return getattr(df, op)(other=other, fill_value=fill_value, axis=0)
+
+        eval_snowpark_pandas_result(*create_test_dfs(lhs), op_helper)
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_binary_string_add_between_series_and_scalar_with_fill_value():
+    # Testing Series + scalar
+    lhs = ["apple", "banana", None, None]
+    rhs = "kitty-cat"
+    fill_value = "fruit"
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda ser: ser.add(other=rhs, fill_value=fill_value)
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_binary_string_add_between_df_and_scalar_with_fill_value():
+    # Testing DataFrame + scalar
+    lhs = [["apple", "banana", None], [None, "mango", None]]
+    rhs = "kitty-cat"
+    fill_value = "fruit"
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda ser: ser.add(other=rhs, fill_value=fill_value)
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_string_add_between_series_of_different_lengths_with_fill_value():
+    lhs = ["apple", "banana", "kitty-cat", "mango"]
+    rhs = ["potato", "orange"]
+    fill_value = "fruit"
+
+    def op_helper(ser):
+        other = pd.Series(rhs) if isinstance(ser, pd.Series) else native_pd.Series(rhs)
+        return ser.add(other=other, fill_value=fill_value)
+
+    eval_snowpark_pandas_result(*create_test_series(lhs), op_helper)
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["add", "radd", "sub", "rsub", "mul", "rmul"],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([3.14, -7.888, 24, 67, 31]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_arithmetic_ops_between_series_and_list_like(op, rhs):
+    lhs = [-0.32, 6.555, 1.34, 10, 0]
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda df: getattr(df, op)(rhs)
+    )
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["add", "radd", "sub", "rsub", "mul", "rmul"],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([3.14, -7.888, 24]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_arithmetic_ops_between_df_and_list_like_on_axis_0(op, rhs):
+    lhs = [[-0.32, 6.555], [1.34, 10], [0, 1000]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=0)
+    )
+
+
+@pytest.mark.parametrize(
+    "op",
+    ["add", "radd", "sub", "rsub", "mul", "rmul"],
+)
+@pytest.mark.parametrize("rhs", list_like_rhs_params([3.14, -7.888, 24]))
+@sql_count_checker(query_count=1)
+def test_binary_arithmetic_ops_between_df_and_list_like_on_axis_1(op, rhs):
+    lhs = [[-0.32, 6.555, 1.34], [10, 0, 1000]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=1)
+    )
+
+
+@pytest.mark.parametrize("op", ["truediv", "floordiv", "rtruediv", "rfloordiv"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([12, -24, 7, -8, 24]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_div_between_series_and_list_like(op, rhs):
+    lhs = [25, 2.5, 0.677, -3.33, -12]
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda df: getattr(df, op)(rhs), atol=0.001
+    )
+
+
+@pytest.mark.parametrize("op", ["truediv", "floordiv", "rtruediv", "rfloordiv"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([67, -24, 7]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_div_between_df_and_list_like_on_axis_0(op, rhs):
+    lhs = [[25, 2.5], [0.677, -3.33], [-12, 7.777]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=0), atol=0.001
+    )
+
+
+@pytest.mark.parametrize("op", ["truediv", "floordiv", "rtruediv", "rfloordiv"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([67, -24, 7]))
+@sql_count_checker(query_count=1)
+def test_binary_div_between_df_and_list_like_on_axis_1(op, rhs):
+    lhs = [[25, 2.5, 0.677], [-3.33, -12, 7.777]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=1), atol=0.001
+    )
+
+
+@pytest.mark.parametrize("lhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize("rhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize("op", [operator.truediv, operator.floordiv])
+@sql_count_checker(query_count=1, join_count=1)
+def test_arithmetic_binary_division_between_series(lhs, rhs, op):
+    # truediv and floordiv have deviating behavior between Snowpark pandas and Snowflake. Use here Snowflake version.
+
+    # div by zero causes both errors in Snowflake and pandas, test separately.
+    # to avoid div by zero errors, replace 0 with 1
+    rhs = [
+        1 if el is not None and not pd.isna(el) and math.isclose(el, 0.0) else el
+        for el in rhs
+    ]
+
+    snow_ans = op(pd.Series(lhs), pd.Series(rhs))
+
+    native_rhs = native_pd.Series(rhs)
+
+    # pandas will throw a bug in mask_zero_div_zero(...) due to zmask = y == 0 where y is a numpy array with dtype='object'.
+    # For this reason, fix up rhs by replacing None with NaN for floordiv case where this happens.
+    if op == operator.floordiv:
+        native_rhs = native_rhs.fillna(np.nan)
+
+    native_ans = op(native_pd.Series(lhs), native_rhs)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+@pytest.mark.parametrize(
+    "lhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES + [[42] * 18]
+)
+@pytest.mark.parametrize("rhs", [[0] * 18, [0.0] * 18, [0, None] * 9, [None, 0.0] * 9])
+@pytest.mark.parametrize("op", [operator.truediv, operator.floordiv])
+@sql_count_checker(query_count=0)
+def test_arithmetic_binary_division_between_series_div_by_zero_negative(lhs, rhs, op):
+    with pytest.raises(SnowparkSQLException, match="Division by zero"):
+        op(
+            pd.Series(lhs), pd.Series(rhs)
+        ).to_pandas()  # call to_pandas because lazy else
+
+    # avoid regressions when testing with pandas
+    # note that pandas ONLY raises ZeroDivisonError when series type of either is object.
+    # Else floating point logic (with Nan, inf, -inf) is used.
+    native_lhs = native_pd.Series(lhs)
+    native_rhs = native_pd.Series(rhs)
+    if is_object_dtype(native_rhs.dtype) and is_object_dtype(native_lhs.dtype):
+        pandas_match = (
+            "float division by zero" if op == operator.truediv else "float divmod()"
+        )
+        with pytest.raises(ZeroDivisionError, match=pandas_match):
+            op(native_lhs, native_rhs)
+
+
+@pytest.mark.parametrize(
+    "lhs", [S1]
+)  # sufficient to test one series, because it's type based
+@pytest.mark.parametrize("rhs", [S1, S3, S5])
+@pytest.mark.parametrize(
+    "op",
+    [
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_arithmetic_binary_ops_between_series_with_booleans_negative(lhs, rhs, op):
+    # negative test for operations involving boolean series, this fails in Snowflake. We match Snowflake semantics,
+    # so fail here. In pandas this is supported.
+    # do no test other way here, assume commutativity
+    with pytest.raises(
+        SnowparkSQLException, match="Invalid argument types for function"
+    ):
+        lhs_series = pd.Series(lhs)
+        result = getattr(lhs_series, op)(pd.Series(rhs))
+        result.to_pandas()
+
+
+@all_supported_binary_ops
+@sql_count_checker(query_count=0)
+def test_other_with_native_pandas_object_raises(op):
+    # native pandas object can not be passed as other arguments for binary operation
+    with pytest.raises(
+        TypeError, match="Please convert this to Snowpark pandas objects"
+    ):
+        lhs_series = pd.Series(S1)
+        getattr(lhs_series, op)(S1)
+
+
+@pytest.mark.parametrize(
+    "lhs,rhs",
+    [
+        (
+            native_pd.Series([4, 7, 4, 2], name="A"),
+            native_pd.Series([1, 5, 3, 8], name="B"),
+        ),  # matching index no duplicates
+        (
+            native_pd.Series([1, 2, 3], index=[0, 2, 1]),
+            native_pd.Series([3, 2, 6], index=[0, 2, 1]),
+        ),  # unsorted matching index, result should also be unsorted
+        (
+            native_pd.Series([1, 2, 3], index=[2, 1, 2]),
+            native_pd.Series([3, 2, 6], index=[2, 1, 2]),
+        ),  # matching index with duplicates
+        (native_pd.Series([1, 2, 3]), native_pd.Series([3, 2])),  # unequal length
+        (
+            native_pd.Series([1, 2, 3]),
+            native_pd.Series([3, 2, 6], index=[0, 2, 1]),
+        ),  # overlapping index but different order
+        (
+            native_pd.Series([1, 2, 3], index=[2, 1, 2]),
+            native_pd.Series([3, 2, 6], index=[2, 2, 1]),
+        ),  # overlapping index but different order with duplicates
+        (
+            native_pd.Series([1, 2, 3], index=[2, 1, 2]),
+            native_pd.Series([3, 2, 6, 3], index=[2, 1, 2, 1]),
+        ),  # overlapping index but unequal length with duplicates
+        (
+            native_pd.Series([1, 2, 3], index=[5, 3, 4]),
+            native_pd.Series([3, 2, 6], index=[3, 4, 2]),
+        ),  # partially overlapping index
+        (
+            native_pd.Series([1, 2, 3], index=[7, 8, 9]),
+            native_pd.Series([3, 2, 6], index=[0, 2, 1]),
+        ),  # index with no overlap
+        (
+            native_pd.Series(
+                [2, 3, 2], pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)])
+            ),
+            native_pd.Series(
+                [395, 23, -22], pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)])
+            ),
+        ),  # Multi-index: partial overlap
+        (
+            native_pd.Series(
+                [2, 3, 2],
+                pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+            native_pd.Series(
+                [395, 23, -22],
+                pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+        ),  # Multi-index: partial overlap and matching index column names
+        (
+            native_pd.Series(
+                [2, 3, 2],
+                pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)], names=["x", "z"]),
+            ),
+            native_pd.Series(
+                [395, 23, -22],
+                pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+        ),  # Multi-index: partial overlap and non-matching index column names
+        # same as above but will Nones in data
+        (
+            native_pd.Series([None, 2, None]),
+            native_pd.Series([3, None, None], index=[0, 2, 1]),
+        ),
+        (
+            native_pd.Series([None, 2, None], index=[7, 8, 9]),
+            native_pd.Series([3, None, None], index=[0, 2, 1]),
+        ),
+        (
+            native_pd.Series(
+                [None, 3, None], pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)])
+            ),
+            native_pd.Series(
+                [395, None, None], pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)])
+            ),
+        ),
+        (
+            native_pd.Series([None, 7, None, 2], name="A"),
+            native_pd.Series([1, None, None, 8], name="B"),
+        ),
+        (
+            native_pd.Series(
+                [None, 3, None],
+                pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+            native_pd.Series(
+                [None, None, -22],
+                pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+        ),
+        (
+            native_pd.Series(
+                [None, 3, None],
+                pd.MultiIndex.from_tuples([(1, 2), (0, 1), (0, 0)], names=["x", "z"]),
+            ),
+            native_pd.Series(
+                [None, None, -22],
+                pd.MultiIndex.from_tuples([(0, 1), (0, 1), (0, 0)], names=["x", "y"]),
+            ),
+        ),
+        # None in index values
+        (
+            native_pd.Series([1, 2, 3], index=[None, 2, 1]),
+            native_pd.Series([3, 2, 6], index=[None, 2, 1]),
+        ),  # unsorted matching index, result should also be unsorted
+        (
+            native_pd.Series([1, 2, 3], index=[2, None, 2]),
+            native_pd.Series([3, 2, 6], index=[2, None, 2]),
+        ),  # matching index with duplicates
+        (native_pd.Series([1, 2, 3]), native_pd.Series([3, 2])),  # unequal length
+        (
+            native_pd.Series([1, 2, 3], index=[0, None, 2]),
+            native_pd.Series([3, 2, 6], index=[None, 2, 0]),
+        ),  # overlapping index but different order
+        (
+            native_pd.Series([1, 2, 3], index=[2, None, 2]),
+            native_pd.Series([3, 2, 6], index=[2, 2, None]),
+        ),  # overlapping index but different order with duplicates
+        (
+            native_pd.Series([1, 2, 3], index=[2, None, 2]),
+            native_pd.Series([3, 2, 6, 3], index=[2, None, 2, None]),
+        ),  # overlapping index but unequal length with duplicates
+        (
+            native_pd.Series([1, 2, 3], index=[5, 3, None]),
+            native_pd.Series([3, 2, 6], index=[3, None, 2]),
+        ),  # partially overlapping index
+        (
+            native_pd.Series([1, 2, 3], index=[7, 8, None]),
+            native_pd.Series([3, 2, 6], index=[0, 2, 1]),
+        ),  # index with no overlap
+    ],
+)
+@pytest.mark.parametrize("op", [operator.add])
+@sql_count_checker(query_count=2, join_count=2)
+def test_binary_add_between_series_for_index_alignment(lhs, rhs, op):
+    def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
+        snow_ans = op(snow_lhs, snow_rhs)
+        native_ans = op(native_lhs, native_rhs)
+        # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
+        eval_snowpark_pandas_result(
+            snow_ans, native_ans, lambda s: s, check_index_type=False
+        )
+
+    check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+
+    # commute series
+    check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+
+
+# MOD TESTS
+TEST_DATA_THAT_MATCHES_FOR_DF_MOD = [
+    native_pd.DataFrame([(1, None), (None, None), (None, -1), (42, 3)], columns=[0, 1]),
+    native_pd.DataFrame(
+        [(1, None), (None, None), (None, -2.0), (0, -1.657)], columns=[0, 1]
+    ),
+]
+
+TEST_DATA_THAT_DEVIATES_FOR_DF_MOD = [
+    [
+        native_pd.DataFrame([(1, 1), (1, -2), (0, -1)], columns=[0, 1]),
+        native_pd.Series([0, 1, 0]),
+    ],
+    [
+        native_pd.DataFrame([(1, 1.0), (1, -2.0), (0, -1.657)], columns=[0, 1]),
+        native_pd.Series([0.0, 1.0, 0.0]),
+    ],
+    [
+        native_pd.DataFrame([(10.01, 1.2), (1.3, -2.4), (0.1, -1.4)], columns=[0, 1]),
+        native_pd.Series([0.41000000000000014, 1.3, 0.1]),
+    ],
+    [
+        native_pd.DataFrame(
+            [(10.01, None), (None, None), (None, -2.4), (0.1, -1.4)], columns=[0, 1]
+        ),
+        native_pd.Series([np.nan, np.nan, np.nan, 0.1]),
+    ],
+]
+
+TEST_DATA_THAT_DEVIATES_FOR_SERIES_TO_SERIES_MOD = [
+    [
+        [
+            native_pd.Series([1.0, 0, 0.009, -1.08, -10000, -0.01, -10, 8.6]),
+            native_pd.Series([0.0987, -3.3334, 5729, -19604, 0.2, 6.543, -0.342, 34.0]),
+        ],
+        native_pd.Series(
+            [
+                0.013000000000000012,
+                0.0,
+                0.009,
+                -1.08,
+                0.0,
+                -0.01,
+                -0.08199999999999896,
+                8.6,
+            ]
+        ),
+    ],
+    [
+        [
+            native_pd.Series(list(range(-5, 5))),
+            native_pd.Series(list(range(67, 77))),
+        ],
+        native_pd.Series([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]),
+    ],
+    [
+        [native_pd.Series(list(range(40, 50))), native_pd.Series(list(range(-15, -5)))],
+        native_pd.Series([10, 13, 3, 7, 0, 5, 1, 7, 6, 1]),
+    ],
+    [
+        [native_pd.Series([7, 7, -7, -7]), native_pd.Series([5, -5, 5, -5])],
+        native_pd.Series([2, 2, -2, -2]),
+    ],
+    [
+        [
+            native_pd.Series(list(range(23, 33))),
+            native_pd.Series(list(range(-15, -5))),
+        ],
+        native_pd.Series([8, 10, 12, 2, 5, 8, 2, 6, 3, 2]),
+    ],
+    [
+        [
+            native_pd.Series([0.0008, -0.007, 10022, -8987, 67.450, 0, 71.0, -6.666]),
+            native_pd.Series([1.0, 12.98, 10022, 0.035, -6.6667, -10000, -0.001, -77]),
+        ],
+        native_pd.Series(
+            [
+                0.0008,
+                -0.007,
+                0.0,
+                -0.014999999999417923,
+                0.7830000000000013,
+                0.0,
+                0.0,
+                -6.666,
+            ]
+        ),
+    ],
+    [
+        [
+            native_pd.Series(
+                [-10, 20, -30, 40, -50.356, 60.402, -70.25896, 80.28952, -90.22]
+            ),
+            native_pd.Series([3, -3, 0.89, -67.4, 6, -10000, 0.001, -77.348, 3.24]),
+        ],
+        native_pd.Series(
+            [
+                -1.0,
+                2.0,
+                -0.629999999999999,
+                40.0,
+                -2.3560000000000016,
+                60.402,
+                -0.0009600000000062892,
+                2.941519999999997,
+                -2.739999999999995,
+            ]
+        ),
+    ],
+    [
+        [native_pd.Series(list(range(5))), native_pd.Series([1, -2, 3.5, -1.4, None])],
+        native_pd.Series([0.0, 1.0, 2.0, 0.20000000000000018, np.nan]),
+    ],
+    [
+        [
+            native_pd.Series([1.0, 0, 0.009, -1.08, -10000, -0.01, -10, 8.6]),
+            native_pd.Series([0.0987, -3.3334, -5729, 19604, 0, 6.543, -0.342, 34.0]),
+        ],
+        native_pd.Series(
+            [
+                0.013000000000000012,
+                0.0,
+                0.009,
+                -1.08,
+                np.nan,
+                -0.01,
+                -0.08199999999999896,
+                8.6,
+            ]
+        ),
+    ],
+    [
+        [
+            native_pd.Series(list(range(23, 33))),
+            native_pd.Series(list(range(-5, 5))),
+        ],
+        native_pd.Series([3.0, 0.0, 1.0, 0.0, 0.0, np.nan, 0.0, 0.0, 1.0, 0.0]),
+    ],
+    [
+        [
+            native_pd.Series([0.0008, -0.007, 10022, -8987, 67.450, 0, 71.0, -6.666]),
+            native_pd.Series([1.0, 12.98, 0, 0.035, -6.6667, -10000, -0.001, -77]),
+        ],
+        native_pd.Series(
+            [
+                0.0008,
+                -0.007,
+                np.nan,
+                -0.014999999999417923,
+                0.7830000000000013,
+                0.0,
+                0.0,
+                -6.666,
+            ]
+        ),
+    ],
+    [
+        [native_pd.Series([1, -2, 3.5, -1.4, None]), native_pd.Series(list(range(5)))],
+        native_pd.Series([np.nan, 0.0, 1.5, -1.4, np.nan]),
+    ],
+]
+
+TEST_DATA_THAT_FAILS_RAISES_NOT_IMPLEMENTED_ERROR_MOD_NUMERIC = [
+    [native_pd.Series([None] * 10), native_pd.Series(list(range(10)))],
+    [native_pd.Series(list(range(10))), native_pd.Series([None] * 10)],
+]
+
+TEST_DATA_FOR_STR_SERIES_MOD_THAT_PASSES = [
+    # Bug in native pandas and Snowpark - using the integer 0 with %d raises a weird error
+    # but with %f it works fine
+    # [native_pd.Series(['test %d'] * 3), native_pd.Series(list(range(3)))],
+    [native_pd.Series(["test %d"] * 3), native_pd.Series([1, 2, 3])],
+    [
+        native_pd.Series(["test %d"] * 4),
+        native_pd.Series([0.9999, 137572.345, -12846.1838, -1.3871]),
+    ],
+    [
+        native_pd.Series(["test %.2f_"] * 7),
+        native_pd.Series([1.111111, -9999999, -0.66666667, 3333.333333, 0, 12, -327]),
+    ],
+]
+
+TEST_DATA_FOR_STR_SERIES_MOD_THAT_FAILS = [
+    [native_pd.Series(["test %d", "test", "test %"]), native_pd.Series(list(range(3)))],
+    [
+        native_pd.Series(["test %d"] * 5),
+        native_pd.Series([0.9999, None, -12846.1838, -1.3871, 0]),
+    ],
+    [
+        native_pd.Series(["test %d", "test %", None]),
+        native_pd.Series([0.9999, -12846.1838, -1.3871]),
+    ],
+]
+
+TEST_DATA_FOR_STR_SERIES_MOD = (
+    TEST_DATA_FOR_STR_SERIES_MOD_THAT_PASSES + TEST_DATA_FOR_STR_SERIES_MOD_THAT_FAILS
+)
+
+TEST_DATA_THAT_MATCHES_FOR_SERIES_MOD = [
+    [native_pd.Series([0.0] * 10), native_pd.Series([0.0] * 10)],
+    [native_pd.Series([0.0] * 10), native_pd.Series([0] * 10)],
+    [native_pd.Series([0] * 10), native_pd.Series([0] * 10)],
+    [native_pd.Series(list(range(40, 50))), native_pd.Series([0] * 10)],
+]
+
+
+@pytest.mark.parametrize("native_df", TEST_DATA_THAT_MATCHES_FOR_DF_MOD)
+@sql_count_checker(query_count=1, join_count=0)
+def test_binary_mod_matches_between_df(native_df):
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[0] % df[1],
+    )
+
+
+# Snowpark mod with negative operands has different behavior/computation than
+# traditional mod and native pandas mod.
+@pytest.mark.parametrize("native_df, res", TEST_DATA_THAT_DEVIATES_FOR_DF_MOD)
+@sql_count_checker(query_count=2, join_count=0)
+def test_binary_mod_deviates_between_df(native_df, res):
+    snow_df = pd.DataFrame(native_df)
+
+    with pytest.raises(AssertionError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df[0] % df[1],
+        )
+
+    snow_res = snow_df[0] % snow_df[1]
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_res, res)
+
+
+# Snowpark mod with negative operands has different behavior/computation than
+# traditional mod and native pandas mod.
+@pytest.mark.parametrize("ser, res", TEST_DATA_THAT_DEVIATES_FOR_SERIES_TO_SERIES_MOD)
+@sql_count_checker(query_count=2, join_count=2)
+def test_binary_that_deviates_for_series_mod(ser, res):
+    snowpark_res = pd.Series(ser[0]) % pd.Series(ser[1])
+    native_res = ser[0] % ser[1]
+
+    with pytest.raises(AssertionError):
+        assert_series_equal(snowpark_res.to_pandas(), native_res, check_dtype=False)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snowpark_res, res)
+
+
+@pytest.mark.parametrize(
+    "ser", TEST_DATA_THAT_FAILS_RAISES_NOT_IMPLEMENTED_ERROR_MOD_NUMERIC
+)
+# The pandas None Series is treated as a StringType, therefore it's a
+# NotImplementedError, not SnowparkSQLException.
+@sql_count_checker(query_count=0)
+def test_binary_fails_raises_not_implemented_error_for_series_mod(ser):
+    with pytest.raises(NotImplementedError):
+        pd.Series(ser[0]) % pd.Series(ser[1])
+
+
+# Using % with str/str values. Should fail since the functionality is
+# not implemented, behavior unknown.
+@pytest.mark.parametrize("ser", TEST_DATA_FOR_STR_SERIES_MOD)
+@sql_count_checker(query_count=0)
+def test_binary_for_str_series_mod(ser):
+    with pytest.raises(NotImplementedError):
+        pd.Series(ser[0]) % pd.Series(ser[1])
+
+
+# Should match for operands with non-negative values.
+@pytest.mark.parametrize("ser", TEST_DATA_THAT_MATCHES_FOR_SERIES_MOD)
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_that_matches_for_series_mod(ser):
+    snowpark_res = pd.Series(ser[0]) % pd.Series(ser[1])
+    native_res = ser[0] % ser[1]
+
+    assert_series_equal(
+        snowpark_res.to_pandas(), native_res, check_dtype=False, check_index_type=False
+    )
+
+
+# The way mod is computed in Snowpark pandas and native pandas differs, therefore ensure that only numbers of the
+# same sign are mod'd together. Snowflake and native pandas compute the mod of two numbers with opposite signs in
+# different ways.
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([12, 10, -12, -9.8, 45]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_mod_between_series_and_list_like(op, rhs):
+    lhs = [100, 283, -34, -56, 7]
+    eval_snowpark_pandas_result(
+        *create_test_series(lhs), lambda df: getattr(df, op)(rhs)
+    )
+
+
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-25, 17, 10]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_mod_between_df_and_list_like_on_axis_0(op, rhs):
+    lhs = [[-73, -18], [7287, 159], [158, 267]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=0)
+    )
+
+
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-25, 17, 10]))
+@sql_count_checker(query_count=1)
+def test_binary_mod_between_df_and_list_like_on_axis_1(op, rhs):
+    lhs = [[-73, 18, 7287], [-159, 158, 267]]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=1)
+    )
+
+
+# The mod behavior in Snowpark pandas is different from native pandas if mod/rmod is performed on operands of
+# different signs. Tests below highlight the difference in behavior for DataFrame/Series <op> list-like.
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-12, 10, -12, -9.8, -45]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_mod_between_series_and_list_like_deviating_behavior(op, rhs):
+    lhs = [-100, 283, 34, -56, 7]
+    with pytest.raises(AssertionError, match="Series are different"):
+        eval_snowpark_pandas_result(
+            *create_test_series(lhs), lambda df: getattr(df, op)(rhs)
+        )
+
+
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-25, 17, -10]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_mod_between_df_and_list_like_on_axis_0_deviating_behavior(op, rhs):
+    lhs = [[-73, -18], [7287, -159], [-158, 267]]
+    # The error message points out the values at respective indices that are different.
+    with pytest.raises(AssertionError, match="are different"):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=0)
+        )
+
+
+@pytest.mark.parametrize("op", ["mod", "rmod"])
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-25, 17, 10]))
+@sql_count_checker(query_count=1)
+def test_binary_mod_between_df_and_list_like_on_axis_1_deviating_behavior(op, rhs):
+    lhs = [[-73, 18, 7287], [159, 158, 267]]
+    # The error message points out the values at respective indices that are different.
+    with pytest.raises(AssertionError, match="are different"):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(lhs), lambda df: getattr(df, op)(rhs, axis=1)
+        )
+
+
+# POW TESTS
+TEST_DATA_FOR_SERIES_POW = [
+    [native_pd.Series(list(range(40, 50))), native_pd.Series([0] * 10)],
+    [
+        native_pd.Series([1.0, 0, 0.009, -1.08, -10000, -0.01, -10, 8.6]),
+        native_pd.Series([0.0987, 3.3334, 5729, 19604, 0, 6.543, 0.342, 34.0]),
+    ],
+    [
+        native_pd.Series([0.0008, -0.007, 10022, -8987, 67.450, 0, 71.0, -6.666]),
+        native_pd.Series([1.0, 12.98, 0, 0.035, 6.6667, 10000, 0.001, 77]),
+    ],
+    [native_pd.Series([0] * 10), native_pd.Series([0] * 10)],
+    [native_pd.Series([0.0] * 10), native_pd.Series([0.0] * 10)],
+    [native_pd.Series([0.0] * 10), native_pd.Series([0] * 10)],
+    [
+        native_pd.Series([0.0008, -0.007, 10022, -8987, None, 0, 71.0, -6.666]),
+        native_pd.Series([1.0, 12.98, None, 0.035, 6.6667, 10000, 0.001, 77]),
+    ],
+    [
+        native_pd.Series([1.0, 0, 0.009, -1.08, -10000, -0.01, -10, 8.6]),
+        native_pd.Series([0.0987, -3.3334, 5729, -19604, 0, 6.543, -0.342, 34.0]),
+    ],
+    [
+        native_pd.Series([0.0008, -0.007, 10022, -8987, 67.450, 0, 71.0, -6.666]),
+        native_pd.Series([1.0, 12.98, 0, 0.035, -6.6667, -10000, -0.001, -77]),
+    ],
+    [
+        native_pd.Series([1.0, 0, 0.09, -1.08, -10000, -0.1, -10, 8.6]),
+        native_pd.Series([-1, -3, -2, 0, -1, 3, -4, -3]),
+    ],
+    [
+        native_pd.Series([0.0008, -0.07, 10022, -8987, 67.450, 10, 71.0, -6.666]),
+        native_pd.Series([0, -3, -2, -1, -2, -5, -2, -34]),
+    ],
+    [
+        native_pd.Series([100, 9, np.nan, -100.02, 3.15, 2.0]),
+        native_pd.Series([-1.4, -0.99, -0.99, -0.2, -2.2, -3.4, -0.5]),
+    ],
+    [
+        native_pd.Series(list(range(0, 11))),
+        native_pd.Series(
+            [-2.5, -0.5, -0.75, -9.9, -1.5, -8.5, -0.1, -0.01, -0.05, -0.12, -0.99]
+        ),
+    ],
+    [
+        native_pd.Series([34, -2, -7, 3, 7]),
+        native_pd.Series([0.0008, -0.007, -10.1, -8987, 0]),
+    ],
+    [
+        native_pd.Series([0.1, 2.3, -0.98, -32.5, 234.567, -765.456]),
+        native_pd.Series([-0.2, -0.007, -10.1, -89.87, -99.033]),
+    ],
+    [
+        native_pd.Series([34, -2, -7, 3, 7]),
+        native_pd.Series([None, -0.007, -10.1, -8987, 0]),
+    ],
+    [
+        native_pd.Series([0.1, 2.3, None, -32.5, 234.567, -765.456]),
+        native_pd.Series([-0.2, -0.007, -10.1, -89.87, -99.033]),
+    ],
+    [native_pd.Series([None] * 10), native_pd.Series([None] * 10)],
+]
+
+TEST_DATA_THAT_DEVIATES_FOR_SERIES_POW = [
+    [
+        [
+            native_pd.Series([1.0, 0, 0.009, -1.08, -10000, -0.01, -10, 8.6]),
+            native_pd.Series([0.0987, -3.3334, 5729, -19604, 0.2, 6.543, -0.342, 34.0]),
+        ],
+        native_pd.Series(
+            [1.0, float("inf"), 0.0, 0.0, np.nan, np.nan, np.nan, 5.9285343740780185e31]
+        ),
+    ],
+    [
+        [native_pd.Series([None] * 10), native_pd.Series(list(range(-5, 5)))],
+        native_pd.Series(
+            [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                1.0,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ]
+        ),
+    ],
+    [
+        [native_pd.Series(list(range(-5, 5))), native_pd.Series([None] * 10)],
+        native_pd.Series(
+            [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                1.0,
+                np.nan,
+                np.nan,
+                np.nan,
+            ]
+        ),
+    ],
+]
+
+
+# Native pandas raises a ValueError for 0 raised to any negative integer,
+# Snowpark returns inf as the result, therefore excluding input data at index 0.
+@pytest.mark.parametrize("native_df", TEST_DATA_FOR_BINARY_SERIES_NUMERIC[1:])
+@sql_count_checker(query_count=1, join_count=0)
+def test_binary_pow_between_series(native_df):
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[0] ** df[1],
+    )
+
+
+# 1 ** None and None ** 0 yield 1.0 in Snowpark while native pandas produces np.nan.
+# 0 ** any negative number produces inf in Snowpark while native pandas produces np.nan.
+@pytest.mark.parametrize("ser, res", TEST_DATA_THAT_DEVIATES_FOR_SERIES_POW)
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_pow_deviating_behavior_series(ser, res):
+    snowpark_res = pd.Series(ser[0]) ** pd.Series(ser[1])
+    assert_snowpark_pandas_equal_to_pandas(snowpark_res, res)
+
+
+@pytest.mark.parametrize("ser", TEST_DATA_FOR_SERIES_POW)
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_for_series_pow(ser):
+    snowpark_res = pd.Series(ser[0]) ** pd.Series(ser[1])
+    native_res = ser[0] ** ser[1]
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snowpark_res, native_res)
+
+
+# Test pow and rpow separately because in native pandas integers cannot be raised to a negative power:
+# rpow is performing pow in the reverse direction and therefore needs different inputs. In Snowpark pandas, it is valid
+# to raise an integer to a negative power, which is tested in the tests above.
+@pytest.mark.parametrize("rhs", list_like_rhs_params([3, 5, 2, 25, 16]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_pow_between_series_and_list_like(rhs):
+    lhs = [29, 15, -36, 1, -12]
+    eval_snowpark_pandas_result(*create_test_series(lhs), lambda df: df.pow(rhs))
+
+
+@pytest.mark.parametrize("rhs", list_like_rhs_params([-25, 186, -1, 2, 6]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_rpow_between_series_and_list_like(rhs):
+    lhs = [4, 1, 362, 62, 12]
+    eval_snowpark_pandas_result(*create_test_series(lhs), lambda df: df.rpow(rhs))
+
+
+@pytest.mark.parametrize("rhs", list_like_rhs_params([1, 2, 3]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_pow_between_df_and_list_like_on_axis_0(rhs):
+    lhs = [[12, -36], [2, -8], [21, -78]]
+    eval_snowpark_pandas_result(*create_test_dfs(lhs), lambda df: df.pow(rhs, axis=0))
+
+
+@pytest.mark.parametrize("rhs", list_like_rhs_params([1, 2, 3]))
+@sql_count_checker(query_count=1)
+def test_binary_pow_between_df_and_list_like_on_axis_1(rhs):
+    lhs = [[12, -36, 2], [-8, 21, -78]]
+    eval_snowpark_pandas_result(*create_test_dfs(lhs), lambda df: df.pow(rhs, axis=1))
+
+
+@pytest.mark.parametrize("rhs", list_like_rhs_params([72, -15, -62]))
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_rpow_between_df_and_list_like_on_axis_0(rhs):
+    lhs = [[1, 2], [3, 4], [5, 6]]
+    eval_snowpark_pandas_result(*create_test_dfs(lhs), lambda df: df.rpow(rhs, axis=0))
+
+
+@pytest.mark.parametrize("rhs", list_like_rhs_params([72, -15, -4]))
+@sql_count_checker(query_count=1)
+def test_binary_rpow_between_df_and_list_like_on_axis_1(rhs):
+    lhs = [[1, 2, 3], [4, 5, 6]]
+    eval_snowpark_pandas_result(*create_test_dfs(lhs), lambda df: df.rpow(rhs, axis=1))
+
+
+@pytest.mark.parametrize(
+    "opname",
+    [
+        "add",
+        "sub",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "floordiv",
+        "rfloordiv",
+        "pow",
+        "rpow",
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "eq",
+        "ne",
+        "gt",
+        "lt",
+        "ge",
+        "le",
+        "mod",
+        "rmod",
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_generated_docstring_examples(opname):
+    # test for operators that correct examples are generated and match up with pandas.
+    # if this test passes, this ensures that all the examples generated in utils.py will be correct.
+    # Native pandas and Snowpark have different behavior for 1 % -2. Native pandas produces -1.0 while Snowpark
+    # produces 1.0.
+
+    # we differ here with the data from pandas docs, because pandas has different div by zero behavior
+    # this here is the original pandas example data:
+    # a_data = {'data':[1, 1, 1, np.nan], 'index':["a", "b", "c", "d"]}
+    # b_data = {'data': [0, 1, 2, np.nan, 1], 'index' : ["a", "b", "c", "d", "f"]}
+    a_data = {"data": [1, -2, 0, np.nan], "index": ["a", "b", "c", "d"]}
+    b_data = {"data": [-2, 1, 3, np.nan, 1], "index": ["a", "b", "c", "d", "f"]}
+
+    # to avoid div-by-zero, swap for rtruediv and rfloordiv a,b
+    if opname in ["rtruediv", "rfloordiv"]:
+        a_data, b_data = b_data, a_data
+
+    # Snowpark and native pandas can have different behavior for negative operands in mod operations.
+    if opname in ["mod", "rmod"]:
+        a_data = {"data": [1, 2, 0, np.nan], "index": ["a", "b", "c", "d"]}
+        b_data = {"data": [2, 1, 3, np.nan, 1], "index": ["a", "b", "c", "d", "f"]}
+
+    def helper(dummy):
+        nonlocal opname
+        if isinstance(dummy, pd.Series):
+            a = pd.Series(**a_data)
+            b = pd.Series(**b_data)
+        else:
+            a = native_pd.Series(**a_data)
+            b = native_pd.Series(**b_data)
+        ans = getattr(a, opname)(b)
+        if opname in ["eq", "ne", "ge", "le", "gt", "lt"] and isinstance(
+            dummy, native_pd.Series
+        ):
+            ans = ans.astype(object)
+            ans[["d", "f"]] = None
+        return ans
+
+    eval_snowpark_pandas_result(pd.Series(), native_pd.Series(), helper)
+
+
+@pytest.mark.parametrize("lhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize("rhs", ALL_SNOWFLAKE_COMPATIBLE_NUMERIC_TEST_SERIES)
+@pytest.mark.parametrize(
+    "op", [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_binary_comparison_method_between_series_numeric(lhs, rhs, op):
+    # when compare NULL with other value, Snowflake always returns NULL
+    # so we use pd.NA here to simulate ternary logic
+    snow_ans = op(pd.Series(lhs), pd.Series(rhs))
+    native_ans = op(
+        native_pd.Series(lhs, dtype="Float64"), native_pd.Series(rhs, dtype="Float64")
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+@pytest.mark.parametrize(
+    "op", [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]
+)
+@sql_count_checker(query_count=0)
+def test_binary_comparison_method_between_series_different_types(op):
+    with pytest.raises(
+        SnowparkSQLException, match="Numeric value .* is not recognized"
+    ):
+        op(pd.Series(["s", "n", "o", "w"]), pd.Series([1, 2, 3, 4])).to_pandas()
+
+
+@pytest.mark.parametrize("lhs", [["s", 1, 2.2, True, ["b", "a"], None]])
+@pytest.mark.parametrize("rhs", [["n", 3, 3.3, False, ["a", "b"], None]])
+@pytest.mark.parametrize(
+    "op", [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]
+)
+@sql_count_checker(query_count=2, join_count=5)
+def test_binary_comparison_method_between_series_variant(lhs, rhs, op):
+    snow_ans = op(pd.Series(lhs), pd.Series(rhs))
+    native_ans = op(native_pd.Series(lhs), native_pd.Series(rhs))
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ans.iloc[:-1], native_ans.iloc[:-1]
+    )
+    assert snow_ans.iloc[-1] is None
+
+
+@pytest.mark.parametrize(
+    "dateoffset",
+    [
+        pd.DateOffset(),
+        pd.DateOffset(5),
+        pd.DateOffset(
+            years=1,
+            months=2,
+            weeks=3,
+            days=4,
+            hours=5,
+            minutes=6,
+            seconds=7,
+            microseconds=8,
+        ),
+        pd.DateOffset(nanoseconds=1),
+    ],
+)
+def test_binary_method_datetime_with_dateoffset_timedelta(dateoffset):
+    # Note: pandas 1.5.3 has a bug where creating a DateOffset with millisecond value results in a TypeError
+    # Note 2: pandas drops nanoseconds when it is used in conjunction with days/minutes/hours/seconds for DateOffset
+    # Note 3: Snowpark pandas converts DateOffset to INTERVAL, which can only be used on the RHS of an expression so
+    # commutativity is not yet supported.
+    native_series = native_pd.Series(
+        [
+            datetime.datetime(
+                2021,
+                1,
+                2,
+                3,
+                4,
+                5,
+            ),
+            datetime.datetime(2022, 12, 13),
+        ]
+    )
+    snow_series = pd.Series(native_series)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_series, native_series, lambda ser: ser + dateoffset
+        )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_series, native_series, lambda ser: ser - dateoffset
+        )
+
+
+@pytest.mark.parametrize(
+    "dateoffset",
+    [
+        pd.DateOffset(year=1),
+        pd.DateOffset(month=1),
+        pd.DateOffset(day=1),
+        pd.DateOffset(weekday=1),
+        pd.DateOffset(hour=1),
+        pd.DateOffset(minute=1),
+        pd.DateOffset(second=1),
+        pd.DateOffset(microsecond=1),
+        pd.DateOffset(nanosecond=1),
+        pd.DateOffset(years=2, day=1),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_binary_method_datetime_with_dateoffset_replacement(dateoffset):
+    # TODO SNOW-1007629: Support DateOffset with replacement offset values
+    snow_series = pd.Series(
+        [
+            datetime.datetime(
+                2021,
+                1,
+                2,
+                3,
+                4,
+                5,
+            ),
+            datetime.datetime(2022, 12, 13),
+        ]
+    )
+    with pytest.raises(
+        NotImplementedError,
+        match="DateOffset with parameters that replace the offset value are not yet supported.",
+    ):
+        snow_series + dateoffset
+
+
+@pytest.mark.parametrize(
+    "df,s",
+    [
+        # example in docstring
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([9, 10, 12], index=[3, 1, 4]),
+        ),
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([1, 4, 3, 8]),
+        ),
+    ],
+)
+def test_binary_add_dataframe_and_series_axis0(df, s):
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    # DataFrame <op> Series
+    with SqlCounter(query_count=1):
+        ans = df.add(s, axis=0)
+        snow_ans = snow_df.add(snow_s, axis=0)
+
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+    # The other direction for axis=0 behaves like axis=1.
+    with SqlCounter(
+        query_count=3,
+    ):
+        snow_ans = snow_s.add(snow_df, axis=0)
+        ans = s.add(df, axis=0)
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+
+# defer testing on all operations to daily Jenkins
+@pytest.mark.parametrize(
+    "opname",
+    [
+        "add",  # Captured in above test which will be always run
+        # - can comment to save resources. Kept here to be exhaustive.
+        "sub",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "floordiv",
+        "rfloordiv",
+        "pow",
+        "rpow",
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        # boolean operators deviate in result behavior. Other tests verify they work correctly.
+        # "eq",
+        # "ne",
+        # "gt",
+        # "lt",
+        # "ge",
+        # "le",
+        "mod",
+        "rmod",
+    ],
+)
+@pytest.mark.parametrize(
+    "df,s",
+    [
+        # example in docstring
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([9, 10, 12], index=[3, 1, 4]),
+        ),
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([1, 4, 3, 8]),
+        ),
+    ],
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="exhaustive and slow operation test")
+def test_binary_op_between_dataframe_and_series_axis0(opname, df, s):
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    # DataFrame <op> Series
+    with SqlCounter(query_count=1):
+        ans = getattr(df, opname)(s, axis=0)
+        snow_ans = getattr(snow_df, opname)(snow_s, axis=0)
+
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+    # The other direction for axis=0 behaves like axis=1 and works without fallback
+    # using the logic implemented for the axis=1 case.
+    # There are 3 queries issued:
+    #   - Query to convert Series to native pandas Series.
+    #   - One query checking whether result is_series_like, this is done in the frontend and could be avoided in the
+    #     future by optimizing is_series_like.
+    #   - One query to retrieve the result as a native pandas object.
+    # Series <op> DataFrame
+    with SqlCounter(
+        query_count=3,
+    ):
+        snow_ans = getattr(snow_s, opname)(snow_df, axis=0)
+        ans = getattr(s, opname)(df, axis=0)
+
+        # Note: Snowpark pandas and pandas differ wrt to NULL handling. Snowpark pandas follows Snowflake,
+        # for this reason if either operand is NULL, NULL will be returned. For some operators
+        # pandas does not follow this, e.g., 1.0 ** np.nan is 1.0 but would be np.nan according to Snowpark pandas.
+        # Simulate the Snowpark pandas behavior here by adjusting the result.
+        # In the following adapted code snipper from original pandas, self is always the DataFrame.
+        from pandas.core.ops import maybe_prepare_scalar_for_op
+
+        # original pandas code to adapt (from _arith_method in pandas.core.frame):
+        # axis: Literal[1] = 1  # only relevant for Series other case
+        # other = ops.maybe_prepare_scalar_for_op(other, (self.shape[axis],))
+        #
+        # self, other = self._align_for_op(other, axis, flex=True, level=None)
+        self, other = df.copy(), s.copy()
+        other = maybe_prepare_scalar_for_op(other, (self.shape[1],))
+        lhs, rhs = self.align(other, axis=1, level=None)
+        mask = pd.isna(rhs) | pd.isna(lhs)
+        # mask result ans from pandas with nan based on mask (wherever mask is True, replace with NaN).
+        ans[mask] = np.nan
+
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+
+@pytest.mark.parametrize(
+    "df,s",
+    [
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([1, 4, 3], index=["C", "B", "A"]),
+        )
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_binary_add_dataframe_and_series_axis0_with_type_mismatch_for_index_negative(
+    df, s
+):
+    # Snowpark pandas does not support aligning two objects with different index types. This
+    # test captures this behavior. To support this, cast index to variant type.
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    # DataFrame <op> Series
+    # because the index of the Dataframe is of integer type, but the one of the Series is of string type
+    # Snowflake will produce an error.
+    with pytest.raises(
+        SnowparkSQLException, match="Numeric value 'C' is not recognized"
+    ):
+        snow_df.add(snow_s, axis=0).to_pandas()
+
+
+@sql_count_checker(query_count=0)
+def test_binary_add_dataframe_and_series_axis0_with_fill_value_negative():
+
+    snow_df = pd.DataFrame()
+    snow_s = pd.Series()
+
+    # fill value is not supported for axis=0
+    with pytest.raises(NotImplementedError, match="fill_value 42 not supported."):
+        snow_df.add(snow_s, axis=0, fill_value=42).to_pandas()
+
+    # check same behavior with pandas to catch change if pandas decides to resolve TODO.
+    with pytest.raises(NotImplementedError, match="fill_value 42 not supported."):
+        native_pd.DataFrame().add(native_pd.Series(), axis=0, fill_value=42)
+
+
+@pytest.mark.parametrize(
+    "df,s",
+    [
+        (
+            native_pd.DataFrame(
+                [[1, None, 2, None], [None, None, None, None]], columns=[1, 3, 4, 5]
+            ),
+            native_pd.Series([None, 1, 2, None, 3, 4, -99]),
+        ),
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], 10: [2, 5, 4, None]}),
+            native_pd.Series([1, 4, 3, 8]),
+        ),
+        (
+            native_pd.DataFrame({"A": [4, 6, None, 7], "B": [2, 5, 4, None]}),
+            native_pd.Series([1, 4, 3], index=["C", "B", "A"]),
+        ),
+        # test here with this configuration that name is dropped correctly.
+        (
+            native_pd.DataFrame(
+                [[4, 6, None, 7], [2, 5, 4, None]],
+                index=native_pd.Index([1, 2], name="test"),
+            ),
+            native_pd.Series([1, 4, 3, 8], name="other"),
+        ),
+        (
+            native_pd.DataFrame(
+                [[1], [2], [3], [4]], columns=["A"], index=[3, 5, 7, 9]
+            ),
+            native_pd.Series([-2, -4, 5, 7, 9], index=[-10, 0, 5, 3, 8]),
+        ),
+    ],
+)
+def test_binary_add_dataframe_sub_series_axis1(df, s):
+
+    # Use sub (-) here as it is not commutative.
+    # Other operators are tested exhausitvely in the CI test test_binary_op_between_dataframe_and_series_axis0 above,
+    # as one case for axis=0 actually invokes axis=1.
+
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    # DataFrame <op> Series
+    ans = df - s
+    snow_ans = snow_df - snow_s
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+    # This case works in pandas, but is not supported technically.
+    # Series <op> DataFrame
+    snow_ans = snow_s - snow_df
+    ans = s - df
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snow_ans, ans, lambda x: x)
+
+
+@pytest.mark.parametrize("fill_value", [42])
+def test_binary_op_with_fill_value_axis1_negative(fill_value):
+
+    df = native_pd.DataFrame(
+        [[1, None, 2, None], [None, None, None, None]], columns=[1, 3, 4, 5]
+    )
+    s = native_pd.Series([None, 1, 2, None, 3, 4, -99])
+
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            (snow_df, snow_s),
+            (df, s),
+            lambda t: t[0].add(t[1], axis=1, fill_value=fill_value),
+            expect_exception=True,
+            expect_exception_type=NotImplementedError,
+            expect_exception_match=f"fill_value {fill_value} not supported.",
+        )
+
+
+@pytest.mark.parametrize(
+    "df,s",
+    [
+        (
+            native_pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "A"]),
+            native_pd.Series([2, 3]),
+        ),
+        (
+            native_pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]),
+            native_pd.Series([2, 3, 99], index=["A", "D", "A"]),
+        ),
+    ],
+)
+def test_binary_add_dataframe_and_series_duplicate_labels_negative(df, s):
+    snow_df = pd.DataFrame(df)
+    snow_s = pd.Series(s)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df, snow_s),
+            (df, s),
+            lambda t: t[0].add(t[1], axis=1),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="cannot reindex on an axis with duplicate labels",
+        )
+
+
+DATAFRAME_DATAFRAME_TEST_LIST = [
+    (
+        native_pd.DataFrame([[1, None, 3], [4, 5, 6]]),
+        native_pd.DataFrame([[1, -2, 3], [6, -5, None]]),
+    ),
+    # test with np.Nan as well
+    (
+        native_pd.DataFrame([[np.NaN, None, 3], [4, 5, 6]]),
+        native_pd.DataFrame([[1, -2, 3], [6, -5, np.NaN]]),
+    ),
+    # Test column alignment.
+    (
+        native_pd.DataFrame([[None, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]),
+        native_pd.DataFrame([[1, -2, 3], [None, -5, 4]], columns=["B", "C", "A"]),
+    ),
+    # Test index alignment.
+    (
+        native_pd.DataFrame([[1, 2, 3], [4, 5, None]], index=[0, 1]),
+        native_pd.DataFrame([[1, None, -3], [6, -5, 4]], index=[1, 0]),
+    ),
+    # Test shape broadcast.
+    (
+        native_pd.DataFrame([[None, None], [4, 5]], columns=["A", "B"]),
+        native_pd.DataFrame([[1, -2, 3], [None, -5, 4]], columns=["D", "A", "C"]),
+    ),
+]
+
+
+@pytest.mark.parametrize("df1,df2", DATAFRAME_DATAFRAME_TEST_LIST)
+def test_binary_sub_dataframe_and_dataframe(df1, df2):
+    snow_df1 = pd.DataFrame(df1)
+    snow_df2 = pd.DataFrame(df2)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df1, snow_df2), (df1, df2), lambda t: t[0] - t[1]
+        )
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df2, snow_df1), (df2, df1), lambda t: t[0] - t[1]
+        )
+
+
+@pytest.mark.parametrize("df1,df2", DATAFRAME_DATAFRAME_TEST_LIST)
+@pytest.mark.parametrize("fill_value", [42, None])
+def test_binary_sub_dataframe_and_dataframe_with_fill_value(df1, df2, fill_value):
+    snow_df1 = pd.DataFrame(df1)
+    snow_df2 = pd.DataFrame(df2)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df1, snow_df2),
+            (df1, df2),
+            lambda t: t[0].sub(t[1], fill_value=fill_value),
+        )
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df2, snow_df1),
+            (df2, df1),
+            lambda t: t[0].sub(t[1], fill_value=fill_value),
+        )
+
+
+@pytest.mark.parametrize(
+    "df1,df2,expect_pandas_fail",
+    [
+        (
+            native_pd.DataFrame([[1, 2, 3, 4]], columns=["A", "B", "A", "B"]),
+            native_pd.DataFrame([[1, 2, 3, 4]], columns=["C", "B", "A", "D"]),
+            True,
+        ),
+        (
+            native_pd.DataFrame([[None, 2, 3], [3, 4, 5]], columns=["A", "B", "A"]),
+            native_pd.DataFrame([[1, -2, 3]], columns=["B", "C", "A"]),
+            False,
+        ),
+        (
+            native_pd.DataFrame([[None, 2, 3]], columns=["A", "B", "C"]),
+            native_pd.DataFrame([[1, -2, 3]], columns=["A", "A", "A"]),
+            True,
+        ),
+        (
+            native_pd.DataFrame([[None, 2, 3]], columns=["A", "B", "A"]),
+            native_pd.DataFrame([[1, -2, 3]], columns=["B", "C", "C"]),
+            False,
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_binary_sub_dataframe_and_dataframe_duplicate_labels_negative(
+    df1, df2, expect_pandas_fail
+):
+    # pandas prints an assert error that is not useful to the user.
+    # Deviate here from pandas by printing a meaningful ValueError instead.
+
+    snow_df1 = pd.DataFrame(df1)
+    snow_df2 = pd.DataFrame(df2)
+
+    # Snowpark pandas provides a clear error, and does not support duplicate labels.
+    with pytest.raises(
+        ValueError, match="cannot reindex on an axis with duplicate labels"
+    ):
+        snow_df1.sub(snow_df2).to_pandas()
+
+    # pandas has buggy behavior, manually check troublesome scenarios.
+    if expect_pandas_fail:
+        with pytest.raises(AssertionError, match="Gaps in blk ref_locs"):
+            # ans = df1 - df2 works in pandas, however calling str on top of it
+            # shows that internally the data has been corrupted.
+            str(df1 - df2)
+
+
+# defer testing on all operations to daily Jenkins
+@pytest.mark.parametrize(
+    "opname",
+    [
+        "add",  # Captured in above test which will be always run
+        # - can comment to save resources. Kept here to be exhaustive.
+        "sub",
+        "truediv",
+        "rtruediv",
+        "floordiv",
+        "rfloordiv",
+        "floordiv",
+        "rfloordiv",
+        # does not work with negative values, comment here to allow reuse of dataset above.
+        # "pow",
+        # "rpow",
+        "add",
+        "radd",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        # These operators have deviating behavior, tested above.
+        # "eq",
+        # "ne",
+        # "gt",
+        # "lt",
+        # "ge",
+        # "le",
+        # "mod",
+        # "rmod",
+    ],
+)
+@pytest.mark.parametrize("df1,df2", DATAFRAME_DATAFRAME_TEST_LIST)
+@pytest.mark.skipif(running_on_public_ci(), reason="exhaustive and slow operation test")
+@pytest.mark.parametrize("fill_value", [None, 42])
+def test_binary_op_between_dataframe_and_dataframe_exhaustive(
+    opname, df1, df2, fill_value
+):
+    snow_df1 = pd.DataFrame(df1)
+    snow_df2 = pd.DataFrame(df2)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df1, snow_df2),
+            (df1, df2),
+            lambda t: getattr(t[0], opname)(t[1], fill_value=fill_value),
+        )
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            (snow_df2, snow_df1),
+            (df2, df1),
+            lambda t: getattr(t[0], opname)(t[1], fill_value=fill_value),
+        )
+
+
+@pytest.mark.parametrize(
+    "func,expected",
+    [
+        (
+            lambda a, b: a & b,
+            native_pd.DataFrame(
+                [[True, None, False], [False, False, False], [False, None, None]]
+            ),
+        ),
+        (
+            lambda a, b: a | b,
+            native_pd.DataFrame(
+                [[True, True, True], [None, False, True], [None, True, None]]
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize(
+    "df1, df2",
+    [
+        (
+            native_pd.DataFrame(
+                [[True, None, False], [None, False, True], [False, True, None]]
+            ),
+            native_pd.DataFrame(
+                [[True, True, True], [False, False, False], [None, None, None]]
+            ),
+        )
+    ],
+)
+def test_binary_bitwise_op_on_df(df1, df2, func, expected):
+
+    # Note: In Snowflake logical AND/OR are only defined for booleans (and NULL). We map directly here, and if users
+    #       use different types let Snowflake's error propagate.
+
+    snow_df1 = pd.DataFrame(df1)
+    snow_df2 = pd.DataFrame(df2)
+
+    # Snowpark pandas NULL behavior is different from pandas,
+    # e.g., NULL OR FALSE = FALSE OR NULL = NULL
+    #       NULL OR TRUE = TRUE OR NULL = TRUE
+    #       NULL OR NULL = NULL
+    #       NULL AND FALSE = FALSE AND NULL = FALSE
+    #       NULL AND TRUE = TRUE AND NULL = NULL
+    #       NULL AND NULL = NULL
+    # use here explicit answers to run tests.
+
+    snow_ans = func(snow_df1, snow_df2)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, expected)
diff --git a/tests/integ/modin/conftest.py b/tests/integ/modin/conftest.py
new file mode 100644
index 00000000000..ca4a8ce3edc
--- /dev/null
+++ b/tests/integ/modin/conftest.py
@@ -0,0 +1,696 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pathlib
+import re
+from datetime import datetime
+
+import modin.pandas as pd
+import numpy as np
+import pandas
+import pytest
+from pandas._typing import Frequency
+from pandas.core.indexing import IndexingError
+from pytest import fail
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.pandas_api_coverage import PandasAPICoverageGenerator
+from tests.integ.modin.sql_counter import (
+    SqlCounter,
+    clear_sql_counter_called,
+    generate_sql_count_report,
+    is_sql_counter_called,
+)
+from tests.utils import Utils
+
+INTEG_PANDAS_SUBPATH = "tests/integ/modin/"
+
+# use a sample size to generate query result in multiple chunks (default chunk size is 4K rows in Snowflake)
+INDEX_SAMPLE_SIZE = 5000
+HALF_INDEX_SAMPLE_SIZE = 2500
+
+nullable_int_sample = np.random.choice(
+    np.append(np.arange(10), [None]), size=INDEX_SAMPLE_SIZE
+)
+nullable_bool_sample = np.random.choice([True, False, None], size=INDEX_SAMPLE_SIZE)
+
+
+# The dataframe/series aggregation methods supported with Snowpark pandas
+skipna_agg_methods = ["max", "min", "sum", "std", "var", "mean", "median"]
+agg_methods = skipna_agg_methods + ["count"]
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--generate_pandas_api_coverage", action="store_true", default=False
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def setup_pandas_api_coverage_generator(pytestconfig):
+    enable_coverage = pytestconfig.getoption("generate_pandas_api_coverage")
+    if enable_coverage:
+        PandasAPICoverageGenerator()
+
+
+@pytest.fixture(scope="function")
+def sql_counter():
+    """Return a sql counter as pytest fixture"""
+    return SqlCounter()
+
+
+# The following autouse pytest fixture will run the sql count checker in record mode when uncommented.  This will
+# capture the sql count results of each test which are then written into the source files along with a status report
+# for any sql counts that can't be automated.  This method may be handy when adding bulk Snowpark pandas tests or
+# extending the sql counts in the future.
+#
+# The following line must be commented when merged into main.
+# @pytest.fixture(autouse=True)
+def auto_annotate_sql_counter(request):
+    counter = SqlCounter()
+
+    # Set record mode so existing sql_count assumptions will *not* fail (will be skipped) if we're running auto here.
+    SqlCounter.set_record_mode(True)
+    with counter:
+        yield
+
+    generate_sql_count_report(request, counter)
+
+
+@pytest.fixture(autouse=True)
+def check_sql_counter_invoked(request):
+    do_check = (
+        INTEG_PANDAS_SUBPATH in request.node.location[0] and running_on_public_ci()
+    )
+
+    if do_check:
+        clear_sql_counter_called()
+
+    yield
+
+    if (
+        do_check
+        # We only need to check the SQL counts if the test has passed so far.
+        and request.node.rep_call.passed
+        and not is_sql_counter_called()
+    ):
+        test_file, line_no, test_name = request.node.location
+        fail(
+            reason=f"Sql counter checker decorator or inline was not run in test '{test_name}' "
+            + f"\n\nTest file: {test_file}\nTest name: {test_name}\nLine no: {line_no}\n\n"
+            + "Please add for test to pass.",
+            pytrace=False,
+        )
+
+
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    """
+    rep_setup - setup result
+    rep_call - test result
+    rep_teardown - teardown result
+    Use this solution to set rep_call to the result of running a test case.
+    Older versions of pytest had request.node.rep_call, but currently we have
+    to add it ourselves.
+    source: https://stackoverflow.com/a/74492150
+    """
+    outcome = yield
+    rep = outcome.get_result()
+    setattr(item, "rep_" + rep.when, rep)
+
+
+@pytest.fixture(params=skipna_agg_methods)
+def skipna_agg_method(request):
+    """Fixture for parametrization of result compatible aggregation methods."""
+    return request.param
+
+
+@pytest.fixture(params=agg_methods)
+def agg_method(request):
+    """Fixture for parametrization of result compatible aggregation methods."""
+    return request.param
+
+
+def create_multiindex() -> pd.MultiIndex:
+    """
+    MultiIndex used to test the general functionality of this object:
+    MultiIndex([('foo', 'one'),
+            ('foo', 'two'),
+            ('bar', 'one'),
+            ('baz', 'two'),
+            ('qux', 'one'),
+            ('qux', 'two')],
+           names=['first', 'second'])
+    """
+
+    major_axis = pd.Index(["foo", "bar", "baz", "qux"])
+    minor_axis = pd.Index(["one", "two"])
+
+    major_codes = np.array([0, 0, 1, 2, 3, 3])
+    minor_codes = np.array([0, 1, 0, 1, 0, 1])
+    index_names = ["first", "second"]
+    return pd.MultiIndex(
+        levels=[major_axis, minor_axis],
+        codes=[major_codes, minor_codes],
+        names=index_names,
+        verify_integrity=False,
+    )
+
+
+def create_multiindex_with_dt64tz_level() -> pd.MultiIndex:
+    """
+    MultiIndex with a level that is a tzaware DatetimeIndex:
+    MultiIndex([(1, 'a', '2013-01-01 00:00:00-05:00'),
+            (1, 'a', '2013-01-02 00:00:00-05:00'),
+            (1, 'a', '2013-01-03 00:00:00-05:00'),
+            (1, 'b', '2013-01-01 00:00:00-05:00'),
+            (1, 'b', '2013-01-02 00:00:00-05:00'),
+            (1, 'b', '2013-01-03 00:00:00-05:00'),
+            (2, 'a', '2013-01-01 00:00:00-05:00'),
+            (2, 'a', '2013-01-02 00:00:00-05:00'),
+            (2, 'a', '2013-01-03 00:00:00-05:00'),
+            (2, 'b', '2013-01-01 00:00:00-05:00'),
+            (2, 'b', '2013-01-02 00:00:00-05:00'),
+            (2, 'b', '2013-01-03 00:00:00-05:00')],
+           names=['one', 'two', 'three'])
+    """
+    # GH#8367 round trip with pickle
+    return pandas.MultiIndex.from_product(
+        [
+            [1, 2],
+            ["a", "b"],
+            pandas.date_range("20130101", periods=3, tz="US/Eastern"),
+        ],
+        names=["one", "two", "three"],
+    )
+
+
+@pytest.fixture(scope="session")
+def indices_dict():
+    return {
+        "string": pd.Index([f"i-{i}" for i in range(INDEX_SAMPLE_SIZE)], dtype=object),
+        "int": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="int16"),
+        "range": pd.RangeIndex(0, INDEX_SAMPLE_SIZE, 1),
+        "float": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="float"),
+        "repeats": pd.Index([0, 0, 1, 1, 2, 2] * int(INDEX_SAMPLE_SIZE / 6)),
+        "bool-dtype": pd.Index(np.random.randn(INDEX_SAMPLE_SIZE) < 0),
+        "tuples": pd.MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
+        "multi": create_multiindex(),
+        # NumericIndex is a pandas 2.x feature
+        "num_int64": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="int64"),
+        "num_float64": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="float64"),
+        "empty": pd.Index([]),
+        "bool-object": pd.Index([True, False] * HALF_INDEX_SAMPLE_SIZE, dtype=object),
+        "string-python": pd.Index(
+            pd.array(
+                pd.Index([f"i-{i}" for i in range(INDEX_SAMPLE_SIZE)], dtype=object),
+                dtype="string[python]",
+            )
+        ),
+        "nullable_int": pd.Index(nullable_int_sample, dtype="Int64"),
+        "nullable_uint": pd.Index(nullable_int_sample, dtype="UInt16"),
+        "nullable_float": pd.Index(nullable_int_sample, dtype="Float32"),
+        "nullable_bool": pd.Index(
+            nullable_bool_sample.astype(bool),
+            dtype="boolean",
+        ),
+        "uint": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="uint"),
+        "uint-small": pd.Index([1, 2, 3], dtype="uint64"),
+        "timedelta": pd.timedelta_range(
+            start="1 day", periods=INDEX_SAMPLE_SIZE, freq="D"
+        ),
+        "multi-with-dt64tz-level": create_multiindex_with_dt64tz_level(),
+        # NumericIndex is a pandas 2.x feature
+        "num_int32": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="int32"),
+        "num_int16": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="int16"),
+        "num_int8": pd.Index(np.arange(INDEX_SAMPLE_SIZE)).astype("int8"),
+        "num_uint64": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="uint64"),
+        "num_uint32": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="uint32"),
+        "num_uint16": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="uint16"),
+        "num_uint8": pd.Index(np.arange(INDEX_SAMPLE_SIZE)).astype("uint8"),
+        "num_float32": pd.Index(np.arange(INDEX_SAMPLE_SIZE), dtype="float32"),
+        "categorical": pd.Index(list("abcde") * 20, dtype="category"),
+        "interval": pd.IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
+        "complex64": pd.Index(np.arange(100)).astype("complex64"),
+        "complex128": pd.Index(np.arange(100)).astype("complex128"),
+        "period": pd.period_range(
+            start=datetime(2000, 1, 1), periods=100, freq="D", name="period[B]"
+        ),
+        # failed due to no
+        "datetime": pandas.DatetimeIndex(
+            pd.bdate_range(datetime(2000, 1, 1), periods=INDEX_SAMPLE_SIZE, freq="B")
+        ),
+        "datetime-tz": pandas.DatetimeIndex(
+            pd.bdate_range(datetime(2000, 1, 1), periods=INDEX_SAMPLE_SIZE, freq="B"),
+            tz="US/Pacific",
+        ),
+    }
+
+
+@pytest.fixture(scope="module", autouse=True)
+def session(session):
+    return session
+
+
+@pytest.fixture(scope="function")
+def test_table_name(session) -> str:
+    test_table_name = f"{Utils.random_table_name()}TESTTABLENAME"
+    try:
+        yield test_table_name
+    finally:
+        Utils.drop_table(session, test_table_name)
+
+
+@pytest.fixture(scope="session")
+def float_frame() -> pandas.DataFrame:
+    """
+    Fixture for DataFrame of floats with index of unique strings
+
+    Columns are ['A', 'B', 'C', 'D'].
+
+                       A         B         C         D
+    P7GACiRnxd -0.465578 -0.361863  0.886172 -0.053465
+    qZKh6afn8n -0.466693 -0.373773  0.266873  1.673901
+    tkp0r6Qble  0.148691 -0.059051  0.174817  1.598433
+    wP70WOCtv8  0.133045 -0.581994 -0.992240  0.261651
+    M2AeYQMnCz -1.207959 -0.185775  0.588206  0.563938
+    QEPzyGDYDo -0.381843 -0.758281  0.502575 -0.565053
+    r78Jwns6dn -0.653707  0.883127  0.682199  0.206159
+    ...              ...       ...       ...       ...
+    IHEGx9NO0T -0.277360  0.113021 -1.018314  0.196316
+    lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999
+    qa66YMWQa5  1.110525  0.475310 -0.747865  0.032121
+    yOa0ATsmcE -0.431457  0.067094  0.096567 -0.264962
+    65znX3uRNG  1.528446  0.160416 -0.109635 -0.032987
+    eCOBvKqf3e  0.235281  1.622222  0.781255  0.392871
+    xSucinXxuV -1.263557  0.252799 -0.552247  0.400426
+
+    [30 rows x 4 columns]
+    """
+    return pandas.DataFrame(
+        np.random.default_rng(2).standard_normal((30, 4)),
+        index=pd.Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD"), dtype=object),
+    )
+
+
+@pytest.fixture(scope="session")
+def float_string_frame():
+    """
+    Fixture for DataFrame of floats and strings with index of unique strings
+
+    Columns are ['A', 'B', 'C', 'D', 'foo'].
+
+                       A         B         C         D  foo
+    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
+    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
+    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
+    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
+    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
+    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
+    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
+    ...              ...       ...       ...       ...  ...
+    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
+    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
+    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
+    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
+    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
+    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
+    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
+
+    [30 rows x 5 columns]
+    """
+    df = pandas.DataFrame(
+        np.random.default_rng(2).standard_normal((30, 4)),
+        index=pd.Index([f"foo_{i}" for i in range(30)], dtype=object),
+        columns=pd.Index(list("ABCD"), dtype=object),
+    )
+    df["foo"] = "bar"
+    return df
+
+
+@pytest.fixture(scope="session")
+def datetime_series(nper=30, freq: Frequency = "B", name=None) -> pandas.Series:
+    """
+    Fixture for Series of floats with DatetimeIndex
+    """
+    return pandas.Series(
+        np.random.default_rng(2).standard_normal(30),
+        index=pandas.date_range(start="2000-01-01", periods=nper, freq=freq),
+        name=name,
+    )
+
+
+@pytest.fixture(scope="function")
+def iloc_snowpark_pandas_input_map():
+    return {
+        "categorical[int]": pd.Categorical([1, 3, 4]),
+        "Index": pd.Index([-0.9, -1.0, -1.1, 0.0, 1.0, 0.9, 1.1, 1]),
+        "Series": pd.Series([-0.9, -1.0, -1.1, 0.0, 1.0, 0.9, 1.1, -1]),
+        "Series[positive_int]": pd.Series(
+            [0, 1]
+        ),  # To test `convert_positional_key` shortcircuit
+        "Series_all_positive_int": pd.Series([1, 1, 2]),
+        "RangeIndex": pd.RangeIndex(1, 4),
+        "Index[bool]": pd.Index([True, True, False, False, False, True, True]),
+        # In pandas 1.5.x the default type was float, but in 2.x is object
+        "emptyFloatSeries": pd.Series(dtype=float),
+        "multi_index_Series": pd.Series(
+            [2, 2, 4],
+            index=pandas.MultiIndex.from_tuples(
+                [(1, "A"), (1, "B"), (2, "C")],
+                name=["Index1", "Index2"],
+            ),
+        ),
+    }
+
+
+@pytest.fixture(scope="function")
+def negative_iloc_snowpark_pandas_input_map():
+    return {
+        "dataframe": (pd.DataFrame(), pandas.DataFrame()),
+    }
+
+
+@pytest.fixture(scope="function")
+def loc_snowpark_pandas_input_map():
+    return {
+        "empty_series": (pd.Series(), pandas.Series()),
+        "multi_index_series_row": (
+            pd.Series(
+                ["c", "a", "a", "b", "a"],
+                index=pd.MultiIndex.from_tuples(
+                    [(1, "A"), (1, "B"), (2, "A"), (2, "C"), (2, "B")],
+                    name=["Index1", "Index2"],
+                ),
+            ),
+            pandas.Series(
+                ["c", "a", "a", "b", "a"],
+                index=pandas.MultiIndex.from_tuples(
+                    [(1, "A"), (1, "B"), (2, "A"), (2, "C"), (2, "B")],
+                    name=["Index1", "Index2"],
+                ),
+            ),
+        ),
+        "multi_index_series_col": (
+            pd.Series(
+                ["C", "A", "A", "B", "A"],
+                index=pd.MultiIndex.from_tuples(
+                    [(1, "A"), (1, "B"), (2, "A"), (2, "C"), (2, "B")],
+                    name=["Index1", "Index2"],
+                ),
+            ),
+            pandas.Series(
+                ["C", "A", "A", "B", "A"],
+                index=pandas.MultiIndex.from_tuples(
+                    [(1, "A"), (1, "B"), (2, "A"), (2, "C"), (2, "B")],
+                    name=["Index1", "Index2"],
+                ),
+            ),
+        ),
+        "series[label]_col": (
+            pd.Series(["C", "A", "A", "B", "A"]),
+            pandas.Series(["C", "A", "A", "B", "A"]),
+        ),
+        "series[bool]_col": (
+            pd.Series(
+                [True, True, True, False, False, True, True],
+                index=pandas.Index(["A", "B", "x", "D", "F", "E", "C"]),
+            ),
+            pandas.Series(
+                [True, True, True, False, False, True, True],
+                index=pandas.Index(["A", "B", "x", "D", "F", "E", "C"]),
+            ),
+        ),
+        "int_float": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
+            ),
+            pandas.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
+            ),
+        ),
+    }
+
+
+@pytest.fixture(scope="function")
+def negative_loc_snowpark_pandas_input_map():
+    return {
+        "dataframe": (pd.DataFrame(), pandas.DataFrame()),
+        "series[bool]_less": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    True,
+                ],
+                index=pandas.Index(["a", "b", "c", "d", "e", "g"]),
+            ),
+            pandas.Series(
+                [
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    True,
+                ],
+                index=pandas.Index(["a", "b", "c", "d", "e", "g"]),
+            ),
+        ),
+        "series[bool]_empty_index": (
+            pd.Series(
+                [
+                    True,
+                ]
+            ),
+            pandas.Series(
+                [
+                    True,
+                ]
+            ),
+        ),
+        "series[bool]_empty": (
+            pd.Series([], dtype=bool),
+            pandas.Series([], dtype=bool),
+        ),
+        "int_float": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index([0.9, 1.1, 2.0, 3.0, 4.0, 5.0, 6.0]),
+            ),
+            pandas.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index([0.9, 1.1, 2.0, 3.0, 4.0, 5.0, 6.0]),
+            ),
+        ),
+        "int_string": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index(["0", "1", "2", "3", "4", "5", "6"]),
+            ),
+            pandas.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pandas.Index(["0", "1", "2", "3", "4", "5", "6"]),
+            ),
+        ),
+        "series[bool]_date_index": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pd.date_range("2023-01-01", periods=7, freq="D"),
+            ),
+            pandas.Series(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ],
+                index=pd.date_range("2023-01-01", periods=7, freq="D"),
+            ),
+        ),
+    }
+
+
+@pytest.fixture(scope="function")
+def negative_loc_diff2native_snowpark_pandas_input_map():
+    return {
+        "not_in_index_series[str]": (
+            pd.Series(["c", "a", "a", "y", "y"]),
+            KeyError,
+            r"None of \[\'y\', \'y\'\] are in the index",
+        ),
+        "series[bool]_dup": (
+            pd.Series(
+                [
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    True,
+                    True,
+                ],
+                index=pandas.Index(
+                    [
+                        "a",
+                        "b",
+                        "a",
+                        "d",
+                        "f",
+                        "e",
+                        "c",
+                    ]
+                ),
+            ),
+            IndexingError,
+            re.escape(
+                "Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match)."
+            ),
+        ),
+        # This test raises the same error as pandas 2.0.0 when key index that is not in df index, e.g. `x` has
+        # duplicates, but pandas 1.5.3 wil not raise such error. We decided to follow 2.0.0 because 1) it seems like
+        # an edge case 1.5.3 forgot about since 1.5.3 will also raise such error msg if key index that is in df index
+        # e.g. `a` has duplicates. 2) This is a rare edge case that adding additional check will slow down other cases.
+        "series[bool]_dup_out_bound": (
+            pd.Series(
+                [True, True, False, False, False, True, True, True],
+                index=pandas.Index(["a", "b", "x", "d", "f", "e", "c", "x"]),
+            ),
+            IndexingError,
+            re.escape(
+                "Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match)."
+            ),
+        ),
+    }
+
+
+@pytest.fixture(scope="function")
+def iloc_setitem_snowpark_pandas_pair_map():
+    return {
+        "series": (
+            (
+                pd.Series([3, 1, 2]),
+                pd.DataFrame([["991"] * 7, ["992"] * 7, ["993"] * 7]),
+            ),
+            (
+                pandas.Series([3, 1, 2]),
+                pandas.DataFrame([["991"] * 7, ["992"] * 7, ["993"] * 7]),
+            ),
+        ),
+        "series_broadcast": (
+            (pd.Series([3, 1, 2]), pd.Series(["991"] * 7)),
+            (pandas.Series([3, 1, 2]), pandas.Series(["991"] * 7)),
+        ),
+        "df_broadcast": (
+            (pd.Series([3, 1, 2]), pd.DataFrame([["991"] * 7])),
+            (pandas.Series([3, 1, 2]), pandas.DataFrame([["991"] * 7])),
+        ),
+    }
+
+
+@pytest.fixture(scope="function")
+def negative_iloc_setitem_snowpark_pandas_pair_map():
+    return {
+        "7x1": (
+            pd.Series([3, 1, 2]),
+            pd.DataFrame([["991"]] * 7),
+            ValueError,
+            re.escape(
+                "shape mismatch: value array of shape (7, 1) could not be broadcast to indexing result of shape (3, 7)"
+            ),
+        ),
+        "1x1": (
+            pd.Series([3, 1, 2]),
+            pd.Series(["991"]),
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ),
+        "7x3": (
+            pd.Series([3, 1, 2]),
+            pd.DataFrame([["991"] * 3 for _ in range(7)]),
+            ValueError,
+            re.escape(
+                "shape mismatch: value array of shape (7, 3) could not be broadcast to indexing result of shape (3, 7)"
+            ),
+        ),
+    }
+
+
+IRIS_FILE_PATH = pathlib.Path(__file__).parent.parent.parent / "resources" / "iris.csv"
+IRIS_DF = pandas.read_csv(IRIS_FILE_PATH)
+
+
+@pytest.fixture(scope="function")
+def numeric_test_data_4x4():
+    return {
+        "A": [5, 8, 11, 14],
+        "B": [6, 9, 12, 15],
+        "C": [7, 10, 13, 16],
+        "D": [8, 11, 14, 17],
+    }
diff --git a/tests/integ/modin/data.py b/tests/integ/modin/data.py
new file mode 100644
index 00000000000..653e0037e09
--- /dev/null
+++ b/tests/integ/modin/data.py
@@ -0,0 +1,24 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+RAW_NA_DF_DATA_TEST_CASES = [
+    ({"A": [1, 2, 3], "B": [4, 5, 6]}, "numeric-no"),
+    ({"A": [None, None, None], "B": [None, None, None]}, "all"),
+    ({"A": [1, None, 3], "B": [None, 5, None]}, "numeric-mixed-1"),
+    ({"A": [None, 2, None], "B": [4, None, 6]}, "numeric-mixed-2"),
+    ({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, "str-all"),
+    ({"A": ["a", None, "c"], "B": [None, "e", None]}, "str-mixed-1"),
+    ({"A": [None, "b", None], "B": ["d", None, "f"]}, "str-mixed-2"),
+    ({"A": [True, False, True], "B": [True, False, True]}, "bool-all"),
+    ({"A": [True, None, True], "B": [None, False, None]}, "bool-mixed-1"),
+    ({"A": [None, False, None], "B": [True, None, True]}, "bool-mixed-2"),
+    ({"A": [True, 1, "X"], "B": ["Y", 3.14, False]}, "mixed"),
+    ({"A": [True, None, "X"], "B": [None, 3.14, None]}, "mixed-mixed-1"),
+    ({"A": [None, 1, None], "B": ["Y", None, False]}, "mixed-mixed-2"),
+]
+
+RAW_NA_DF_SERIES_TEST_CASES = [
+    (list(df_data.values()), test_case)
+    for (df_data, test_case) in RAW_NA_DF_DATA_TEST_CASES
+]
diff --git a/tests/integ/modin/extensions/__init__.py b/tests/integ/modin/extensions/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/extensions/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/extensions/test_dataframe_extensions.py b/tests/integ/modin/extensions/test_dataframe_extensions.py
new file mode 100644
index 00000000000..f7fe0a823d3
--- /dev/null
+++ b/tests/integ/modin/extensions/test_dataframe_extensions.py
@@ -0,0 +1,94 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import modin.pandas as pd
+import pandas as native_pd
+from modin.pandas.api.extensions import register_dataframe_accessor
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_series_equal
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_extension_simple_method():
+    expected_string_val = "Some string value"
+    method_name = "new_method"
+    df = pd.DataFrame([1, 2, 3])
+
+    @register_dataframe_accessor(method_name)
+    def my_method_implementation(self):
+        return expected_string_val
+
+    assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
+    assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is my_method_implementation
+    assert df.new_method() == expected_string_val
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_extension_non_method():
+    expected_val = 4
+    attribute_name = "four"
+    register_dataframe_accessor(attribute_name)(expected_val)
+    df = pd.DataFrame([1, 2, 3])
+
+    assert attribute_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
+    assert pd.dataframe._DATAFRAME_EXTENSIONS_[attribute_name] == 4
+    assert df.four == expected_val
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_dataframe_extension_access_existing_methods():
+    df = pd.DataFrame([1, 2, 3])
+    native_df = native_pd.DataFrame([1, 2, 3])
+    method_name = "self_accessor"
+    expected_result = native_df.sum() * native_df.count()
+
+    @register_dataframe_accessor(method_name)
+    def ext_new_method(self):
+        return self.sum() * self.count()
+
+    assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
+    assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is ext_new_method
+    assert_series_equal(
+        df.self_accessor(), expected_result, check_index_type=False, check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_extension_override_method():
+    # Override the existing `sum` method on the DataFrame class
+    df = pd.DataFrame([1, 2, 3])
+    method_name = "sum"
+    expected_result = 100
+
+    original_method = pd.DataFrame.sum
+
+    try:
+
+        @register_dataframe_accessor(method_name)
+        def ext_override_sum(self):
+            return expected_result
+
+        assert method_name in pd.dataframe._DATAFRAME_EXTENSIONS_.keys()
+        assert pd.dataframe._DATAFRAME_EXTENSIONS_[method_name] is ext_override_sum
+        assert df.sum() == expected_result
+    finally:
+        # Because we're overriding a method on the DataFrame class, we need to restore the original method
+        # after we're done, or else other tests that use DataFrame.sum will fail
+        register_dataframe_accessor(method_name)(original_method)
+        del pd.dataframe._DATAFRAME_EXTENSIONS_[method_name]
diff --git a/tests/integ/modin/extensions/test_series_extensions.py b/tests/integ/modin/extensions/test_series_extensions.py
new file mode 100644
index 00000000000..e8cff71275a
--- /dev/null
+++ b/tests/integ/modin/extensions/test_series_extensions.py
@@ -0,0 +1,91 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import modin.pandas as pd
+import pandas as native_pd
+from modin.pandas.api.extensions import register_series_accessor
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@sql_count_checker(query_count=0)
+def test_series_extension_simple_method():
+    expected_string_val = "Some string value"
+    method_name = "new_method"
+    ser = pd.Series([1, 2, 3])
+
+    @register_series_accessor(method_name)
+    def my_method_implementation(self):
+        return expected_string_val
+
+    assert method_name in pd.series._SERIES_EXTENSIONS_.keys()
+    assert pd.series._SERIES_EXTENSIONS_[method_name] is my_method_implementation
+    assert ser.new_method() == expected_string_val
+
+
+@sql_count_checker(query_count=0)
+def test_series_extension_non_method():
+    expected_val = 4
+    attribute_name = "four"
+    register_series_accessor(attribute_name)(expected_val)
+    ser = pd.Series([1, 2, 3])
+
+    assert attribute_name in pd.series._SERIES_EXTENSIONS_.keys()
+    assert pd.series._SERIES_EXTENSIONS_[attribute_name] == 4
+    assert ser.four == expected_val
+
+
+@sql_count_checker(query_count=2)
+def test_series_extension_access_existing_methods():
+    ser = pd.Series([1, 2, 3])
+    native_ser = native_pd.Series([1, 2, 3])
+    method_name = "self_accessor"
+    expected_result = native_ser.sum() / native_ser.count()
+
+    @register_series_accessor(method_name)
+    def ext_new_method(self):
+        return self.sum() / self.count()
+
+    assert method_name in pd.series._SERIES_EXTENSIONS_.keys()
+    assert pd.series._SERIES_EXTENSIONS_[method_name] is ext_new_method
+    assert ser.self_accessor() == expected_result
+
+
+@sql_count_checker(query_count=0)
+def test_series_extension_override_method():
+    # Override the existing `sum` method on the Series class
+    ser = pd.Series([1, 2, 3])
+    method_name = "sum"
+    expected_result = 100
+
+    original_method = pd.Series.sum
+
+    try:
+
+        @register_series_accessor(method_name)
+        def my_method(self):
+            return expected_result
+
+        assert method_name in pd.series._SERIES_EXTENSIONS_.keys()
+        assert pd.series._SERIES_EXTENSIONS_[method_name] is my_method
+        assert ser.sum() == expected_result
+    finally:
+        # Because we're overriding a method on the Series class, we need to restore the original method
+        # after we're done, or else other tests that use Series.sum will fail
+        register_series_accessor(method_name)(original_method)
+        del pd.series._SERIES_EXTENSIONS_[method_name]
diff --git a/tests/integ/modin/frame/__init__.py b/tests/integ/modin/frame/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/frame/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/frame/conftest.py b/tests/integ/modin/frame/conftest.py
new file mode 100644
index 00000000000..2c10177bbb9
--- /dev/null
+++ b/tests/integ/modin/frame/conftest.py
@@ -0,0 +1,183 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+default_index_df_data = {
+    "A": [1, 2, 3, 4, 5, 6, 7],
+    "B": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7],
+    "C": [True, False, True, False, True, False, True],
+    "D": ["a", "b", "c", "d", "e", "f", "g"],
+    "E": [
+        "2021-01-01",
+        "2021-02-01",
+        "2021-03-01",
+        "2021-04-01",
+        "2021-05-01",
+        "2021-06-01",
+        "2021-07-01",
+    ],
+    "F": [(1,), (2,), (3,), (4,), (5,), (6,), (7,)],
+    "G": [[1], [2], [3], [4], [5], [6], [7]],
+}
+
+default_index_df_data_for_multiindex_columns = {
+    ("bar", "one"): [1, 2, 3, 4, 5, 6, 7],
+    ("bar", "two"): [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7],
+    ("baz", "one"): [True, False, True, False, True, False, True],
+    ("baz", "two"): ["a", "b", "c", "d", "e", "f", "g"],
+    ("foo", "one"): [
+        "2021-01-01",
+        "2021-02-01",
+        "2021-03-01",
+        "2021-04-01",
+        "2021-05-01",
+        "2021-06-01",
+        "2021-07-01",
+    ],
+    ("foo", "two"): [(1,), (2,), (3,), (4,), (5,), (6,), (7,)],
+    ("extra", "one"): [[1], [2], [3], [4], [5], [6], [7]],
+}
+
+loc_str_index = ["a", "b", "c", "d", "e", "f", "a"]
+str_index_native_df_input = native_pd.DataFrame(
+    default_index_df_data,
+    index=loc_str_index,
+).rename(columns={"G": "A"})
+
+date_columns = native_pd.date_range(start="2023-01-01", periods=7, freq="h", tz="UTC")
+date_columns_no_tz = date_columns.tz_localize(None)
+df_data = np.random.randint(0, 10, size=(7, 7))
+
+arrays_for_multiindex = [
+    ["bar", "bar", "baz", "baz", "foo", "foo", "extra"],
+    ["one", "two", "one", "two", "one", "two", "one"],
+]
+
+simple_data = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
+
+
+@pytest.fixture(scope="module")
+def score_test_data():
+    return {
+        "name": ["Alice", "Bob", "Bob"],
+        "score": [9.5, 8, 9.5],
+        "employed": [False, True, False],
+        "kids": [0, 0, 1],
+    }
+
+
+@pytest.fixture(scope="function")
+def empty_index_native_pandas_dataframe() -> native_pd.DataFrame:
+    # An empty dataframe with only index
+    native_df = native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    native_df.set_index("A", inplace=True)
+    native_df.drop(columns=native_df.columns, inplace=True)
+    return native_df
+
+
+@pytest.fixture(scope="function")
+def default_index_snowpark_pandas_df():
+    return pd.DataFrame(default_index_df_data)
+
+
+@pytest.fixture(scope="function")
+def default_index_native_df():
+    # native pandas DataFrame with default index
+    return native_pd.DataFrame(default_index_df_data)
+
+
+@pytest.fixture(scope="function")
+def multiindex_native():
+    tuples = list(zip(*arrays_for_multiindex))
+    return native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+
+
+@pytest.fixture(scope="function")
+def native_df_with_multiindex_columns(multiindex_native):
+    return native_pd.DataFrame(
+        default_index_df_data_for_multiindex_columns, columns=multiindex_native
+    )
+
+
+@pytest.fixture(scope="function")
+def empty_snowpark_pandas_df():
+    return pd.DataFrame()
+
+
+@pytest.fixture(scope="function")
+def str_index_snowpark_pandas_df():
+
+    return pd.DataFrame(
+        str_index_native_df_input,
+    )
+
+
+@pytest.fixture(scope="function")
+def str_index_native_df():
+
+    return str_index_native_df_input.copy(deep=True)
+
+
+@pytest.fixture(scope="function")
+def time_column_snowpark_pandas_df():
+    return pd.DataFrame(df_data, index=loc_str_index, columns=date_columns)
+
+
+@pytest.fixture(scope="function")
+def time_column_native_df():
+    return native_pd.DataFrame(df_data, index=loc_str_index, columns=date_columns)
+
+
+@pytest.fixture(scope="function")
+def time_index_snowpark_pandas_df():
+    return pd.DataFrame(df_data, index=date_columns_no_tz, columns=date_columns_no_tz)
+
+
+@pytest.fixture(scope="function")
+def time_index_native_df():
+    return native_pd.DataFrame(
+        df_data, index=date_columns_no_tz, columns=date_columns_no_tz
+    )
+
+
+@pytest.fixture(scope="function")
+def float_native_df() -> native_pd.DataFrame:
+    """
+    Fixture for DataFrame of floats with index of unique strings
+
+    [30 rows x 4 columns]
+    """
+    return native_pd.DataFrame(
+        {f"c{c}": native_pd.Series(np.random.randn(30)) for c in range(4)}
+    )
+
+
+@pytest.fixture(scope="function")
+def native_df_1k_1k():
+    """
+    Fixture for DataFrame of int with index of unique strings
+
+    [1000 rows x 1000 columns]
+    """
+    return native_pd.DataFrame(
+        np.random.randint(-100, 100, size=(1000, 1000)),
+        columns=[f"c{i}" for i in range(1000)],
+    )
+
+
+@pytest.fixture(scope="function")
+def simple_native_pandas_df():
+    return native_pd.DataFrame(simple_data)
+
+
+@pytest.fixture(scope="function")
+def simple_snowpark_pandas_df():
+    return pd.DataFrame(simple_data)
diff --git a/tests/integ/modin/frame/test_add_prefix.py b/tests/integ/modin/frame/test_add_prefix.py
new file mode 100644
index 00000000000..a496ed57369
--- /dev/null
+++ b/tests/integ/modin/frame/test_add_prefix.py
@@ -0,0 +1,86 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_ADD_PREFIX_DATA = [
+    "prefix_",
+    " 0 9 0 1 2 3",
+    12345,
+    ("tuple data", 12),
+    [24, 25, 26, "list!"],
+    [[], [1, 2]],
+    native_pd.Series(["this", "is", "a", "series"]),
+    native_pd.DataFrame({"column1": [67, 68], "column2": [909, 910]}),
+    None,
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_df_add_prefix(
+    prefix, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_df_add_prefix_multiindex(prefix, native_df_with_multiindex_columns):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(native_df_with_multiindex_columns),
+        native_df_with_multiindex_columns,
+        lambda df: df.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_df_add_prefix_time_column_df(
+    prefix, time_column_snowpark_pandas_df, time_column_native_df
+):
+    eval_snowpark_pandas_result(
+        time_column_snowpark_pandas_df,
+        time_column_native_df,
+        lambda df: df.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_df_add_prefix_snowpark_pandas_series(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    prefix_series = native_pd.Series([1, 2, 3])
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_prefix(
+            pd.Series(prefix_series) if isinstance(df, pd.DataFrame) else prefix_series
+        ),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_df_add_prefix_snowpark_pandas_df(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    prefix_df = native_pd.DataFrame([["1", "2"], ["3", "4"]], dtype=str)
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_prefix(
+            pd.DataFrame(prefix_df) if isinstance(df, pd.DataFrame) else prefix_df
+        ),
+    )
diff --git a/tests/integ/modin/frame/test_add_suffix.py b/tests/integ/modin/frame/test_add_suffix.py
new file mode 100644
index 00000000000..15799ae3edc
--- /dev/null
+++ b/tests/integ/modin/frame/test_add_suffix.py
@@ -0,0 +1,86 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_ADD_SUFFIX_DATA = [
+    "_suffix",
+    " 0 9 0 1 2 3",
+    12345,
+    ("tuple data", 12),
+    [24, 25, 26, "list!"],
+    [[], [1, 2]],
+    native_pd.Series(["this", "is", "a", "series"]),
+    native_pd.DataFrame({"column1": [67, 68], "column2": [909, 910]}),
+    None,
+]
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_df_add_suffix(
+    suffix, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_df_add_suffix_multiindex(suffix, native_df_with_multiindex_columns):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(native_df_with_multiindex_columns),
+        native_df_with_multiindex_columns,
+        lambda df: df.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_df_add_suffix_time_column_df(
+    suffix, time_column_snowpark_pandas_df, time_column_native_df
+):
+    eval_snowpark_pandas_result(
+        time_column_snowpark_pandas_df,
+        time_column_native_df,
+        lambda df: df.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_df_add_suffix_snowpark_pandas_series(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    suffix_series = native_pd.Series([1, 2, 3])
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_suffix(
+            pd.Series(suffix_series) if isinstance(df, pd.DataFrame) else suffix_series
+        ),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_df_add_prefix_snowpark_pandas_df(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    suffix_df = native_pd.DataFrame([["1", "2"], ["3", "4"]], dtype=str)
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.add_suffix(
+            pd.DataFrame(suffix_df) if isinstance(df, pd.DataFrame) else suffix_df
+        ),
+    )
diff --git a/tests/integ/modin/frame/test_aggregate.py b/tests/integ/modin/frame/test_aggregate.py
new file mode 100644
index 00000000000..763da3f2955
--- /dev/null
+++ b/tests/integ/modin/frame/test_aggregate.py
@@ -0,0 +1,742 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.testing import assert_series_equal
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.types import VariantType
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    ColumnSchema,
+    assert_snowpark_pandas_equal_to_pandas,
+    create_snow_df_with_table_and_data,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="function")
+def numeric_native_df() -> native_pd.DataFrame:
+    native_df = native_pd.DataFrame(
+        {"A": range(5), "B": range(0, 10, 2), "C": [3.6, 2.0, 7.2, 4.1, 8.2]}
+    )
+    return native_df
+
+
+@pytest.fixture(scope="function")
+def native_df_multiindex() -> native_pd.DataFrame:
+    tuples = list(
+        zip(
+            *[
+                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+                ["one", "two", "one", "two", "one", "two", "one", "two"],
+            ]
+        )
+    )
+    index = native_pd.MultiIndex.from_tuples(tuples)
+    columns = native_pd.MultiIndex.from_tuples(
+        [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")]
+    )
+    data = np.random.randn(8, 4)
+    native_df = native_pd.DataFrame(data, index=index, columns=columns)
+    return native_df
+
+
+@pytest.mark.parametrize(
+    "func, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), 0),
+        (lambda df: df.aggregate(["min", np.max]), 1),
+        (
+            lambda df: df.aggregate({"B": ["count"], "A": "sum", "C": ["max", "min"]}),
+            3,
+        ),
+        (lambda df: df.aggregate({"A": ["count", "max"], "B": [max, "min"]}), 2),
+        # note following aggregation requires transpose
+        (lambda df: df.aggregate(max), 0),
+        (lambda df: df.min(), 0),
+        (lambda df: df.min(axis=1), 0),
+        (lambda df: df.max(), 0),
+        (lambda df: df.max(axis=1), 0),
+        (lambda df: df.count(), 0),
+        (lambda df: df.count(axis=1), 0),
+        (lambda df: df.sum(), 0),
+        (lambda df: df.sum(axis=1), 0),
+        (lambda df: df.mean(), 0),
+        (lambda df: df.median(), 0),
+        (lambda df: df.skew(), 0),
+        (lambda df: df.std(), 0),
+        (lambda df: df.var(), 0),
+        (lambda df: df.quantile(), 0),
+        (lambda df: df.aggregate(["idxmin"]), 0),
+        (
+            lambda df: df.aggregate(
+                {"B": ["idxmax"], "A": "sum", "C": ["max", "idxmin"]}
+            ),
+            3,
+        ),
+        (lambda df: df.aggregate({"B": "idxmax", "A": "sum"}), 0),
+    ],
+)
+def test_agg_basic(numeric_native_df, func, expected_union_count):
+    snow_df = pd.DataFrame(numeric_native_df)
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(snow_df, numeric_native_df, func)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        param(
+            {"col0": ["a", "b", "c"], "col1": [0, 1, 2]},
+            id="string_and_integer_column",
+        ),
+        param(
+            {"col0": ["a", "b", "c"], "col1": ["d", "e", "f"]},
+            id="only_string_columns",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "numeric_only_kwargs",
+    [
+        param({}, id="no_numeric_only_kwarg"),
+        param({"numeric_only": False}, id="numeric_only_False"),
+        param({"numeric_only": True}, id="numeric_only_True"),
+        param({"numeric_only": None}, id="numeric_only_None"),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_string_sum(data, numeric_only_kwargs):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(data), lambda df: df.sum(**numeric_only_kwargs)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_string_sum_of_reversed_df():
+    # check that we get the string concatenation right even when the dataframe
+    # is not in its original order.
+    eval_snowpark_pandas_result(
+        *create_test_dfs(["a", "b", "c"]),
+        lambda df: df.iloc[::-1].sum(),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_string_sum_with_nulls():
+    """
+    pandas raises TypeError('can only concatenate str (not \"int\") to str') but
+    we instead drop the null values without an exception because checking for
+    nulls requires at least one extra query. one possible solution is to do the
+    sum and then compare the length of the result to the length of the df, but
+    right now that requires an extra length query."""
+    snow_df, pandas_df = create_test_dfs(["a", None, "b"])
+    with pytest.raises(TypeError):
+        pandas_df.sum(numeric_only=False)
+    snow_result = snow_df.sum(numeric_only=False)
+    assert_series_equal(snow_result.to_pandas(), native_pd.Series(["ab"]))
+
+
+@pytest.mark.parametrize(
+    "func, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), 0),
+        (lambda df: df.aggregate(["min", "max"]), 1),
+    ],
+)
+def test_agg_dup_col(numeric_native_df, func, expected_union_count):
+    # rename to have duplicated column
+    numeric_native_df.columns = ["A", "B", "A"]
+    snow_df = pd.DataFrame(numeric_native_df)
+
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(snow_df, numeric_native_df, func)
+
+
+@pytest.mark.parametrize(
+    "func, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), 0),
+        (lambda df: df.aggregate([max, "min", "count"]), 2),
+        (lambda df: df.min(), 0),
+        (lambda df: df.max(), 0),
+        (lambda df: df.count(), 0),
+        (lambda df: df.mean(numeric_only=True), 0),
+        (lambda df: df.median(numeric_only=True), 0),
+        (lambda df: df.std(numeric_only=True), 0),
+        (lambda df: df.var(numeric_only=True), 0),
+        (lambda df: df.aggregate("max"), 0),
+    ],
+)
+def test_agg_with_all_missing(func, expected_union_count):
+    missing_df = native_pd.DataFrame(
+        {
+            "nan": [np.nan, np.nan, np.nan, np.nan],
+            "na": [native_pd.NA, native_pd.NA, native_pd.NA, native_pd.NA],
+            "nat": [native_pd.NaT, native_pd.NaT, native_pd.NaT, native_pd.NaT],
+            "none": [None, None, None, None],
+            "values": [1, 2, 3, 4],
+        }
+    )
+    snow_missing_df = pd.DataFrame(missing_df)
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(snow_missing_df, missing_df, func)
+
+
+@pytest.mark.parametrize(
+    "interpolation",
+    [
+        None,
+        "linear",
+        pytest.param(
+            "nearest",
+            marks=pytest.mark.xfail(
+                # strict=False because this passes on the sample data with the default quantile (0.5)
+                strict=False,
+                reason="SNOW-1062839: PERCENTILE_DISC does not give desired behavior",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "q",
+    [
+        None,
+        0.1,
+        pytest.param(
+            [0.25, 0.5, 0.75],
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason='SNOW-1062878: agg("quantile") with list-like q fails',
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_agg_quantile_kwargs(interpolation, q):
+    data = {
+        "with_nans": [np.nan, 25, 0, 75, 50, 100, np.nan],
+        "no_nans": [0.1, -10.0, 100, 9.2, 999, -999, 20.2],
+    }
+    kwargs = {}
+    if interpolation is not None:
+        kwargs["interpolation"] = interpolation
+    if q is not None:
+        kwargs["q"] = q
+    eval_snowpark_pandas_result(
+        *create_test_dfs(data), lambda df: df.agg("quantile", **kwargs)
+    )
+
+
+@sql_count_checker(query_count=1, union_count=2)
+def test_agg_with_variant():
+    native_df = native_pd.DataFrame(
+        {
+            "nat": [native_pd.NaT, native_pd.NaT, native_pd.NaT, native_pd.NaT],
+            "nan": [np.nan, np.nan, np.nan, np.nan],
+            "str": ["a", "b", "c", "d"],
+            "none": [None, None, None, None],
+            "reg_value": [1.1, 2.3, 4.5, 6],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.aggregate([min, "count", "max"])
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_general_agg_numeric_only(agg_method):
+    native_df = native_pd.DataFrame(
+        {
+            "int": [4, 2, 0, 8],
+            "str": ["female", "male", "male", "female"],
+            "float": [32.1, 28.92, 37.88, 15.6],
+            "variant": [(1, 1), (2, 3), (4, 5), (2, 2)],
+            "NaN mix": [3, None, 2.5, None],
+            "timestamp": [
+                native_pd.Timestamp(1513393355.5, unit="s"),
+                native_pd.Timestamp(1513393375.5, unit="s"),
+                native_pd.NaT,
+                native_pd.Timestamp(1513393355.5, unit="s"),
+            ],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: getattr(df, agg_method)(numeric_only=True),
+    )
+
+
+AGG_MULTIINDEX_FAIL_REASON = (
+    "SNOW-1010307: MultiIndex aggregation on axis=1 not supported"
+)
+
+
+@pytest.mark.parametrize(
+    "func, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), 0),
+        (lambda df: df.aggregate([max, np.min, "count"]), 2),
+        (lambda df: df.aggregate(min), 0),
+        (lambda df: df.max(), 0),
+    ],
+)
+def test_agg_with_multiindex(native_df_multiindex, func, expected_union_count):
+    snow_df = pd.DataFrame(native_df_multiindex)
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(snow_df, native_df_multiindex, func)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        pytest.param(
+            lambda df: df.aggregate([max, np.min, "count"], axis=1),
+            marks=pytest.mark.xfail(strict=True, reason=AGG_MULTIINDEX_FAIL_REASON),
+        ),
+        pytest.param(
+            lambda df: df.aggregate(min, axis=1),
+            marks=pytest.mark.xfail(strict=True, reason=AGG_MULTIINDEX_FAIL_REASON),
+        ),
+        pytest.param(
+            lambda df: df.max(axis=1),
+            marks=pytest.mark.xfail(strict=True, reason=AGG_MULTIINDEX_FAIL_REASON),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_agg_with_multiindex_negative(native_df_multiindex, func):
+    snow_df = pd.DataFrame(native_df_multiindex)
+    eval_snowpark_pandas_result(snow_df, native_df_multiindex, func)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.aggregate(["min"]),
+        lambda df: df.aggregate([max, np.min, "count"]),
+        lambda df: df.aggregate(min),
+        lambda df: df.aggregate({"A": [min], "B": [np.min, max]}),
+    ],
+)
+@sql_count_checker(query_count=1, union_count=1)
+def test_agg_with_empty_df_no_row(func):
+    native_df = native_pd.DataFrame([], columns=["A", "B", "C"], dtype="int64")
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.aggregate([min, max]))
+
+
+@pytest.mark.parametrize(
+    "pandas_df",
+    [
+        native_pd.DataFrame([]),
+        native_pd.DataFrame([], index=native_pd.Index([1, 2, 3])),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_agg_with_no_column_raises(pandas_df):
+    snow_df = pd.DataFrame(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.aggregate([min, max]),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="No column to aggregate on",
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.aggregate(min),
+        lambda df: df.max(),
+        lambda df: df.count(),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_agg_with_single_col(func):
+    native_df = native_pd.DataFrame({"A": [1, 2, 3]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.aggregate(min),
+        lambda df: df.max(),
+        lambda df: df.count(),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_agg_with_multi_col(func):
+    native_df = native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@pytest.mark.parametrize(
+    "func, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), 0),
+        (lambda df: df.aggregate([max, np.min, "count"]), 2),
+    ],
+)
+def test_agg_with_single_col_mult_agg(func, expected_union_count):
+    native_df = native_pd.DataFrame({"A": [1, 2, 3]})
+    snow_df = pd.DataFrame(native_df)
+
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@sql_count_checker(query_count=0)
+def test_agg_func_arg_non_raise(numeric_native_df):
+    snow_df = pd.DataFrame(numeric_native_df)
+    # TODO (SNOW-902943): pandas allows usage of NamedAgg in kwargs to configure
+    #   tuples of (columns, agg_func) with rename. For example:
+    #   df.groupby('A').agg(b_min=pd.NamedAgg(column='B', aggfunc='min')), which applies
+    #   min function on column 'B', and uses 'b_min' as the new column name.
+    #   Once supported, refine the check to check both.
+    with pytest.raises(
+        NotImplementedError, match="Must provide value for 'func' argument"
+    ):
+        snow_df.agg(x=("A", "max"), y=("B", "min"), z=("C", "mean"))
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.std(ddof=0),
+        lambda df: df.var(ddof=0),
+        lambda df: df.std(ddof=1),
+        lambda df: df.var(ddof=1),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_ddof(numeric_native_df, func):
+    snow_df = pd.DataFrame(numeric_native_df)
+
+    eval_snowpark_pandas_result(snow_df, numeric_native_df, func)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.std(ddof=2),
+        lambda df: df.var(ddof=2),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_ddof_fallback_negative(numeric_native_df, func):
+    # TODO: SNOW-892532 support ddof that is not 0 or 1 for var/std
+    snow_df = pd.DataFrame(numeric_native_df)
+    with pytest.raises(
+        NotImplementedError,
+        match="Aggregate function .* not supported yet in Snowpark pandas.",
+    ):
+        eval_snowpark_pandas_result(snow_df, numeric_native_df, func)
+
+
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=1)
+def test_general_agg_skipna(skipna_agg_method, skipna):
+    native_df = native_pd.DataFrame(
+        {
+            "int": [1, 2, 4, 5],
+            "float": [32.1, 28.92, 37.88, 15.6],
+            "float_nan": [2.0, np.nan, 3.0, 4.0],
+            "nan": [np.nan, np.nan, np.nan, np.nan],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: getattr(df, skipna_agg_method)(skipna=skipna)
+    )
+
+
+@pytest.mark.parametrize("min_count", [-1, 0, 1, 3, 4, 8, 40])
+@pytest.mark.parametrize("axis", [0, 1], ids=["axis0", "axis1"])
+@sql_count_checker(query_count=1)
+def test_sum_min_count(min_count, axis):
+    native_df = native_pd.DataFrame(
+        {
+            "ts_na": [4.0, np.nan, 4.0, 5.0, np.nan, np.nan],
+            "ts": [3, 4, 8, 2, 0, 5],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.sum(min_count=min_count, axis=axis)
+    )
+
+
+@sql_count_checker(query_count=3, union_count=4)
+def test_agg_valid_variant_col(session, test_table_name):
+    pandas_df = native_pd.DataFrame(
+        {
+            "COL_0": [2, 3, 6, 5, 4, 10],
+            "COL_1": [2.0, None, 1.1, 5, None, 3.5],
+            "COL_2": ["aa", "ac", "dc", "ee", "bb", "de"],
+            "COL_3": [5, None, 1.4, 5.2, 4, None],
+        }
+    )
+    column_schema = [
+        ColumnSchema("COL_0", VariantType()),
+        ColumnSchema("COL_1", VariantType()),
+        ColumnSchema("COL_2", VariantType()),
+        ColumnSchema("COL_3", VariantType()),
+    ]
+    snowpark_pandas_df = create_snow_df_with_table_and_data(
+        session, test_table_name, column_schema, pandas_df.values.tolist()
+    )
+
+    # 2 queries are executed, one for df.agg, another one is triggered during to_pandas,
+    # which tries to convert the variant columns back to the right format.
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_df,
+        pandas_df,
+        lambda df: df.agg(
+            {
+                "COL_0": ["median"],
+                "COL_1": ["sum"],
+                "COL_2": ["min", "max"],
+                "COL_3": ["max", "std"],
+            }
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        "min",
+        "max",
+        "count",
+        "sum",
+        np.min,
+        np.max,
+        np.sum,
+        ["max", "min", "count", "sum"],
+        ["min"],
+        ["idxmax", "max", "idxmin", "min"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_agg_axis_1_simple(agg_func):
+    data = [[1, 2, 3], [2, 4, -1], [3, 0, 6]]
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+    eval_snowpark_pandas_result(df, native_df, lambda df: df.agg(agg_func, axis=1))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [["a", "b", "c"], [None, None, "d"], ["e", None, "g"]],
+        [[np.nan, 1, 2], [3, np.nan, 4], [5, np.nan, 6]],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_count_axis_1(data):
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+    eval_snowpark_pandas_result(df, native_df, lambda df: df.count(axis=1))
+
+
+@pytest.mark.parametrize("axis", [0, 1], ids=["axis0", "axis1"])
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        {0: "min", 2: "max", 1: "min"},  # result is a single unnamed series
+        {0: "min", 1: ["min", "max"]},  # result is a DF with 2 columns ("min", "max")
+        {0: ["min"], 1: "min"},  # result is a DF with 1 column ("min")
+        {
+            2: ["count", "min", "max"],
+            0: ["max", "min"],
+            1: ["min", "count", "max"],
+        },  # order of result columns is determined by order of appearance of keys
+        {1: "max", 0: "min"},  # order of rows is determined by order of keys
+    ],
+)
+def test_agg_dict(agg_func, axis):
+    data = [[1, 2, 3], [2, 4, -1], [3, 0, 6]]
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+    if axis == 1:
+        # With axis=1, we always generate each row of aggregations separately (one for each row label
+        # in the input dict) and UNION ALL together at the end. This is true even if the output is a Series.
+        expected_union_count = len(agg_func) - 1
+    else:
+        # With axis=0, we generate exactly one row for each aggregation function in the input dict,
+        # which are then combined via UNION ALL. This means when the output is a Series, we
+        # do not perform UNION ALL.
+        num_unique_agg_funcs = len(
+            functools.reduce(
+                lambda acc, value: acc | set(value),
+                (
+                    value if isinstance(value, list) else [value]
+                    for value in agg_func.values()
+                ),
+                set(),
+            )
+        )
+        expected_union_count = (
+            0
+            if all(not isinstance(value, list) for value in agg_func.values())
+            else num_unique_agg_funcs - 1
+        )
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            df, native_df, lambda df: df.agg(agg_func, axis=axis)
+        )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        "min",
+        ["max", "min", "sum"],
+        {1: "max", 0: "min"},
+        {1: "idxmax", 0: "idxmin"},
+        {1: "idxmax", 0: "max"},
+    ],
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[1, 0, np.nan], [1, np.nan, np.nan], [np.nan, np.nan, np.nan]],
+        [[np.nan], [1]],
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False], ids=lambda skipna: f"skipna={skipna}")
+def test_agg_axis_1_with_nan(agg_func, data, skipna):
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+
+    expected_union_count = 1 if isinstance(agg_func, dict) else 0
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            df, native_df, lambda df: df.agg(agg_func, axis=1, skipna=skipna)
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_agg_axis_1_col_agg_name():
+    # Tests running an aggregation on a dataframe where the columns have names "min" and "max".
+    # This ensures that we do not error out due to a collision between the names of columns in the
+    # original frame and names in the aggregation result.
+    data = [[0, 1], [1, 0]]
+    native_df = native_pd.DataFrame(data, columns=["min", "max"])
+    df = pd.DataFrame(data, columns=["min", "max"])
+    eval_snowpark_pandas_result(
+        df, native_df, lambda df: df.agg(["max", "min"], axis=1)
+    )
+
+
+@pytest.mark.parametrize("axis", [0, 1], ids=["axis0", "axis1"])
+@sql_count_checker(query_count=0)
+def test_agg_empty_func_negative(axis):
+    # Error out if a row/column label had no aggregations provided
+    agg_func = {0: [], 1: "min"}
+    data = [[0, 1], [1, 0]]
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+    eval_snowpark_pandas_result(
+        df,
+        native_df,
+        lambda df: df.agg(agg_func, axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="No objects to concatenate",
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=1, union_count=1)
+def test_agg_axis_1_missing_label():
+    # Like our implementation of df.loc and similar methods, if a label specified in the
+    # aggregation dictionary does not exist in the DF, we simply ignore that label.
+    # This differs from pandas, which would raise an error.
+    data = {"a": [0, 1], "b": [2, 3]}
+    agg_func = {0: ["max"], 2: ["min"]}
+    native_df = native_pd.DataFrame(data)
+    df = pd.DataFrame(data)
+    with pytest.raises(KeyError):
+        native_df.agg(agg_func, axis=1)
+    assert_snowpark_pandas_equal_to_pandas(
+        df.agg(agg_func, axis=1),
+        native_pd.DataFrame({"max": [2], "min": [None]}),
+    )
+
+
+@sql_count_checker(query_count=1, union_count=4)
+def test_agg_preserve_cols_order():
+    pandas_df = native_pd.DataFrame(
+        {
+            "COL_0": [2, 3, 6, 5, 4, 10],
+            "COL_1": [2.0, None, 1.1, 5, None, 3.5],
+            "COL_2": ["aa", "ac", "dc", "ee", "bb", "de"],
+            "COL_3": [5, None, 1.4, 5.2, 4, None],
+        }
+    )
+    snowpark_pandas_df = pd.DataFrame(pandas_df)
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_df,
+        pandas_df,
+        lambda df: df.agg(
+            {
+                "COL_0": ["median"],
+                "COL_1": ["sum"],
+                "COL_2": ["min", "max"],
+                "COL_3": ["sum", "std"],
+            }
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_agg_numeric_only_negative(numeric_native_df, agg_method):
+    snow_df = pd.DataFrame(numeric_native_df)
+    with pytest.raises(
+        ValueError, match=re.escape('For argument "numeric_only" expected type bool')
+    ):
+        getattr(snow_df, agg_method)(numeric_only="TEST")
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("skipna", ["TEST", None])
+def test_agg_skipna_negative(numeric_native_df, skipna_agg_method, skipna):
+    snow_df = pd.DataFrame(numeric_native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        numeric_native_df,
+        lambda df: getattr(df, skipna_agg_method)(skipna=skipna),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+    )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("min_count", ["TEST", 5.3, 5.0])
+def test_agg_min_count_negative(numeric_native_df, min_count):
+    snow_df = pd.DataFrame(numeric_native_df)
+    # This behavior is different compare with Native pandas.
+    with pytest.raises(ValueError, match=re.escape("min_count must be an integer")):
+        snow_df.sum(min_count=min_count)
diff --git a/tests/integ/modin/frame/test_all_any.py b/tests/integ/modin/frame/test_all_any.py
new file mode 100644
index 00000000000..c69b6adf4ac
--- /dev/null
+++ b/tests/integ/modin/frame/test_all_any.py
@@ -0,0 +1,361 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    assert_values_equal,
+    eval_snowpark_pandas_result,
+)
+
+
+def nonempty_boolagg_sql_counter(axis):
+    # All operations incur 1 query to perform the initial aggregation, regardless of axis.
+    # axis=0 incurs an extra call to check the size of the index, and axis=None
+    # calls the query compiler function with axis=0 twice.
+    # There is no extra call for df.columnarize() because the result is already transposed in the QC.
+    # These numbers differ for empty dataframes depending on whether the columns/rows
+    # are empty or not.
+    expected_query_count = 3 if axis is None else 1 if axis in (1, "columns") else 2
+    return SqlCounter(query_count=expected_query_count)
+
+
+def boolagg_comparator(axis):
+    return (
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck
+        if axis is not None
+        else assert_values_equal
+    )
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [],
+        [[]],
+        [[], [], []],
+        pytest.param(
+            {"a": [], "b": []},
+            marks=pytest.mark.skip(
+                "empty rows are treated as float64; TO_BOOLEAN cast does not accept float arguments"
+            ),
+        ),
+    ],
+)
+def test_empty(data, axis):
+    with SqlCounter(query_count=1):
+        # Treat index like any other column in a DataFrame when it comes to types,
+        # therefore Snowpark pandas returns an Index(dtype="object") for an empty index
+        # whereas pandas returns RangeIndex()
+        # This is compatible with behavior for empty dataframe in other tests.
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis),
+            comparator=assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+            check_index_type=False,
+        )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis),
+            comparator=assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        param(
+            [],
+            marks=pytest.mark.xfail(
+                strict=True, raises=AssertionError, reason="SNOW-1017231"
+            ),
+        ),
+        param(
+            [[]],
+            marks=pytest.mark.xfail(
+                strict=True, raises=AssertionError, reason="SNOW-1017231"
+            ),
+        ),
+        param(
+            [[], [], []],
+            marks=pytest.mark.xfail(
+                strict=True, raises=AssertionError, reason="SNOW-1017231"
+            ),
+        ),
+        pytest.param(
+            {"a": [], "b": []},
+            marks=pytest.mark.skip(
+                "empty rows are treated as float64; TO_BOOLEAN cast does not accept float arguments"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("method", ["any", "all"])
+def test_empty_axis_none(data, method):
+    """This test function is separate from the other empty dataframe test
+    function because we expected a scalar result, so we can't pass the
+    check_index_type kwarg."""
+    with SqlCounter(query_count=2):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: getattr(df, method)(axis=None),
+            comparator=assert_values_equal,
+        )
+
+
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        [[0, 0, 1], [0, 1, 1], [1, 1, 1]],
+        [[1, 1, 1], [1, 1, 1], [1, 1, 1]],
+    ],
+)
+def test_all_int(data, axis):
+    with nonempty_boolagg_sql_counter(axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        [[0, 0, 1], [0, 1, 1], [0, 1, 1]],
+        [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+    ],
+)
+def test_any_int(data, axis):
+    with nonempty_boolagg_sql_counter(axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.parametrize("axis", ["columns", "index"])
+def test_all_axis_str_arg(axis):
+    with nonempty_boolagg_sql_counter(axis):
+        data = [[0, 1], [0, 1]]
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis),
+        )
+
+
+@pytest.mark.parametrize("axis", ["columns", "index"])
+def test_any_axis_str_arg(axis):
+    with nonempty_boolagg_sql_counter(axis):
+        data = [[0, 1], [0, 1]]
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis),
+        )
+
+
+@pytest.mark.parametrize("axis", [None, 0, 1])
+def test_all_named_index(axis):
+    with nonempty_boolagg_sql_counter(axis):
+        data = {"a": [1, 0, 3], "b": [4, 5, 6]}
+        index_name = ["c", "d", "e"]
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data, index_name),
+            native_pd.DataFrame(data, index_name),
+            lambda df: df.all(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.parametrize("axis", [None, 0, 1])
+def test_any_named_index(axis):
+    with nonempty_boolagg_sql_counter(axis):
+        data = {"a": [1, 0, 3], "b": [4, 5, 6]}
+        index_name = ["c", "d", "e"]
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data, index_name),
+            native_pd.DataFrame(data, index_name),
+            lambda df: df.any(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "c": [True, False, False],
+            "d": [True, True, True],
+        }
+    ],
+)
+def test_all_bool_only(data, axis):
+    with nonempty_boolagg_sql_counter(axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "c": [True, False, False],
+            "d": [False, False, False],
+        }
+    ],
+)
+def test_any_bool_only(data, axis):
+    with nonempty_boolagg_sql_counter(axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [0.1, 0.0, 0.3], "b": [0.4, 0.5, 0.6]},
+        {"a": [np.nan, 0.0, np.nan], "b": [0.4, 0.5, np.nan]},
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False])
+def test_all_float_fallback(data, axis, skipna):
+    # Because axis=None calls the method with axis=0 twice, it incurs an extra query
+    # to check the length of the index after the first call is handled by a fallback
+    with SqlCounter(
+        query_count=9 if axis is None else 8, fallback_count=1, sproc_count=1
+    ):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis, skipna=skipna),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [0.1, 0.0, 0.3], "b": [0.4, 0.5, 0.6]},
+        {"a": [np.nan, 0.0, np.nan], "b": [0.4, 0.5, np.nan]},
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False])
+def test_any_float_fallback(data, axis, skipna):
+    # Because axis=None calls the method with axis=0 twice, it incurs an extra query
+    # to check the length of the index after the first call is handled by a fallback
+    with SqlCounter(
+        query_count=9 if axis is None else 8, fallback_count=1, sproc_count=1
+    ):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis, skipna=skipna),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": ["", "b", "c"], "b": ["d", "e", "f"]},
+    ],
+)
+def test_all_str_fallback(data, axis):
+    # Because axis=None calls the method with axis=0 twice, it incurs an extra query
+    # to check the length of the index after the first call is handled by a fallback
+    with SqlCounter(
+        query_count=9 if axis is None else 8, fallback_count=1, sproc_count=1
+    ):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.all(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": ["", "b", "c"], "b": ["", "e", "f"]},
+    ],
+)
+def test_any_str_fallback(data, axis):
+    # Because axis=None calls the method with axis=0 twice, it incurs an extra query
+    # to check the length of the index after the first call is handled by a fallback
+    with SqlCounter(
+        query_count=9 if axis is None else 8, fallback_count=1, sproc_count=1
+    ):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(data),
+            native_pd.DataFrame(data),
+            lambda df: df.any(axis=axis),
+            comparator=boolagg_comparator(axis),
+        )
diff --git a/tests/integ/modin/frame/test_apply.py b/tests/integ/modin/frame/test_apply.py
new file mode 100644
index 00000000000..e304c0f2507
--- /dev/null
+++ b/tests/integ/modin/frame/test_apply.py
@@ -0,0 +1,1034 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from snowflake.snowpark.functions import udf
+from snowflake.snowpark.modin.plugin._internal.apply_utils import (
+    DEFAULT_UDTF_PARTITION_SIZE,
+)
+from snowflake.snowpark.types import DoubleType, PandasSeriesType
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.series.test_apply import create_func_with_return_type_hint
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+# TODO SNOW-891796: replace native_pd with pd after allowing using snowpandas module/function in UDF
+
+# test data which has a python type as return type that is not a pandas Series/pandas DataFrame/tuple/list
+BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP = [
+    [[[1.1, 2.2], [3, np.nan]], np.min, "float"],
+    [[[1.1, 2.2], [3, np.nan]], lambda x: x.sum(), "float"],
+    [[[1.1, 2.2], [3, np.nan]], lambda x: x.size, "int"],
+    [[[1.1, 2.2], [3, np.nan]], lambda x: "0" if x.sum() > 1 else 0, "object"],
+    [[["snow", "flake"], ["data", "cloud"]], lambda x: x[0] + x[1], "str"],
+    [[[True, False], [False, False]], lambda x: x[0] ^ x[1], "bool"],
+    (
+        [
+            [bytes("snow", "utf-8"), bytes("flake", "utf-8")],
+            [bytes("data", "utf-8"), bytes("cloud", "utf-8")],
+        ],
+        lambda x: (x[0] + x[1]).decode(),
+        "str",
+    ),
+    (
+        [[["a", "b"], ["c", "d"]], [["a", "b"], ["c", "d"]]],
+        lambda x: x[0][1] + x[1][0],
+        "str",
+    ),
+    (
+        [[{"a": "b"}, {"c": "d"}], [{"c": "b"}, {"a": "d"}]],
+        lambda x: str(x[0]) + str(x[1]),
+        "str",
+    ),
+]
+
+# test data where return type is a pandas Series
+BASIC_DATA_FUNC_SERIES_RETURN_TYPE_MAP = [
+    [[[1.1, 2.2], [3, np.nan]], lambda x: x + 1, "native_pd.Series"],
+    [[[1.1, 2.2], [3, np.nan]], np.sqrt, "native_pd.Series"],
+    [[[1.1, 2.2], [3, np.nan]], lambda x: x > 2, "native_pd.Series"],
+    [
+        [["snow", "flake"], ["data", "cloud"]],
+        lambda x: x.str.replace("a", "b"),
+        "native_pd.Series",
+    ],
+    [[[True, False], [False, False]], lambda x: x.astype(np.int64), "native_pd.Series"],
+    [[[True, False], [False, False]], lambda x: x[0] ^ x[1], "bool"],
+    (
+        [
+            [bytes("snow", "utf-8"), bytes("flake", "utf-8")],
+            [bytes("data", "utf-8"), bytes("cloud", "utf-8")],
+        ],
+        lambda x: x.astype(str),
+        "native_pd.Series",
+    ),
+    (
+        [[["a", "b"], ["c", "d"]], [["a", "b"], ["c", "d"]]],
+        lambda x: x,
+        "native_pd.Series",
+    ),
+    (
+        [[{"a": "b"}, {"c": "d"}], [{"c": "b"}, {"a": "d"}]],
+        lambda x: x,
+        "native_pd.Series",
+    ),
+]
+
+BASIC_DATA_FUNC_RETURN_TYPE_MAP = (
+    BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP + BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP
+)
+
+
+@pytest.mark.parametrize("data, func, return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+def test_axis_1_basic_types_without_type_hints(data, func, return_type):
+    # this test processes functions without type hints and invokes the UDTF solution.
+    native_df = native_pd.DataFrame(data, columns=["A", "b"])
+    snow_df = pd.DataFrame(data, columns=["A", "b"])
+    with SqlCounter(query_count=5):
+        eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func, axis=1))
+
+
+@pytest.mark.parametrize(
+    "data, func, return_type", BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP
+)
+def test_axis_1_basic_types_with_type_hints(data, func, return_type):
+    # create explicitly for supported python types UDF with type hints and process via vUDF.
+    native_df = native_pd.DataFrame(data, columns=["A", "b"])
+    snow_df = pd.DataFrame(data, columns=["A", "b"])
+    func_with_type_hint = create_func_with_return_type_hint(func, return_type)
+    #  Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
+    with SqlCounter(query_count=4, join_count=0, udtf_count=0):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda x: x.apply(func_with_type_hint, axis=1)
+        )
+
+
+@pytest.mark.parametrize(
+    "df,row_label",
+    [
+        (
+            native_pd.DataFrame(
+                [[1, 2], [None, 3]], columns=["A", "b"], index=["A", "B"]
+            ),
+            "B",
+        ),
+        (
+            native_pd.DataFrame(
+                [[1, 2], [None, 3]],
+                columns=["A", "b"],
+                index=pd.MultiIndex.from_tuples([(1, 2), (1, 1)]),
+            ),
+            (1, 2),
+        ),
+    ],
+)
+def test_axis_1_index_passed_as_name(df, row_label):
+    # when using apply(axis=1) the original index of the dataframe is passed as name.
+    # test here for this for regular index and multi-index scenario.
+
+    def foo(row) -> str:
+        if row.name == row_label:
+            return "MATCHING LABEL"
+        else:
+            return "NO MATCH"
+
+    snow_df = pd.DataFrame(df)
+    #  Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
+    with SqlCounter(query_count=4, join_count=0, udtf_count=0):
+        eval_snowpark_pandas_result(snow_df, df, lambda x: x.apply(foo, axis=1))
+
+
+@pytest.mark.parametrize(
+    "data, func, expected_result",
+    [
+        [
+            [
+                [datetime.date(2023, 1, 1), None],
+                [datetime.date(2022, 12, 31), datetime.date(2021, 1, 9)],
+            ],
+            lambda x: x.dt.day,
+            native_pd.DataFrame([[1, np.nan], [31, 9.0]]),
+        ],
+        [
+            [
+                [datetime.date(2023, 1, 1), None],
+                [datetime.date(2022, 12, 31), datetime.date(2021, 1, 9)],
+            ],
+            lambda x: x.astype(str),
+            native_pd.DataFrame([["2023-01-01", "NaT"], ["2022-12-31", "2021-01-09"]]),
+        ],
+        [
+            [
+                [datetime.time(1, 2, 3), None],
+                [datetime.time(1, 2, 3, 1), datetime.time(1)],
+            ],
+            lambda x: x.dt.seconds,
+            native_pd.DataFrame([[3723, np.nan], [3723, 3600]]),
+        ],
+        [
+            [
+                [datetime.time(1, 2, 3), None],
+                [datetime.time(1, 2, 3, 1), datetime.time(1)],
+            ],
+            lambda x: x.astype(str),
+            native_pd.DataFrame(
+                [
+                    ["0 days 01:02:03", "NaT"],
+                    ["0 days 01:02:03.000001", "0 days 01:00:00"],
+                ]
+            ),
+        ],
+        [
+            [
+                [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+                [
+                    datetime.datetime(2022, 12, 31, 1, 2, 3, 1),
+                    datetime.datetime(
+                        2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc
+                    ),
+                ],
+            ],
+            lambda x: native_pd.to_datetime(x, utc=True).dt.tz,
+            native_pd.Series(["UTC", "UTC"]),
+        ],
+        [
+            [
+                [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+                [
+                    datetime.datetime(2022, 12, 31, 1, 2, 3, 1),
+                    datetime.datetime(
+                        2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc
+                    ),
+                ],
+            ],
+            lambda x: x.astype(str),
+            native_pd.DataFrame(
+                [
+                    ["2023-01-01 01:02:03", "None"],
+                    ["2022-12-31 01:02:03.000001", "2023-01-01 01:02:03+00:00"],
+                ]
+            ),
+        ],
+    ],
+)
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_date_time_timestamp_type(data, func, expected_result):
+    snow_df = pd.DataFrame(data)
+    result = snow_df.apply(func, axis=1)
+    assert_snowpark_pandas_equal_to_pandas(result, expected_result)
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_return_list():
+    snow_df = pd.DataFrame([[1, 2], [3, 4]])
+    native_df = native_pd.DataFrame([[1, 2], [3, 4]])
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.apply(lambda x: [1, 2], axis=1)
+    )
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_return_series():
+    snow_df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
+    native_df = native_pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.apply(lambda x: native_pd.Series([1, 2], index=["C", "d"]), axis=1),
+    )
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_return_series_with_different_label_results():
+    df = native_pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
+    snow_df = pd.DataFrame(df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        df,
+        lambda df: df.apply(
+            lambda x: native_pd.Series([1, 2], index=["a", "b"])
+            if x.sum() > 3
+            else native_pd.Series([0, 1, 2], index=["c", "a", "b"]),
+            axis=1,
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df, func",
+    [
+        (
+            native_pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"]),
+            lambda x: x["a"] + x["b"],
+        ),
+        (
+            native_pd.DataFrame(
+                [[1, 2, 3, 4], [5, 6, 7, 8]],
+                columns=native_pd.MultiIndex.from_tuples(
+                    [("baz", "A"), ("baz", "B"), ("zoo", "A"), ("zoo", "B")]
+                ),
+            ),
+            lambda x: x["baz", "B"] * x["zoo", "A"],
+        ),
+    ],
+)
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_column_labels(native_df, func):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func, axis=1))
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_raw():
+    snow_df = pd.DataFrame([[1, 2], [3, 4]])
+    native_df = native_pd.DataFrame([[1, 2], [3, 4]])
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.apply(lambda x: str(type(x)), axis=1, raw=True)
+    )
+
+
+@sql_count_checker(query_count=6)
+def test_axis_1_return_not_json_serializable_label():
+    snow_df = pd.DataFrame([1])
+    with pytest.raises(
+        SnowparkSQLException, match="Object of type date is not JSON serializable"
+    ):
+        # label
+        snow_df.apply(
+            lambda x: native_pd.Series([1], index=[datetime.date.today()]), axis=1
+        ).to_pandas()
+
+    with pytest.raises(
+        SnowparkSQLException, match="Object of type DataFrame is not JSON serializable"
+    ):
+        # return value
+        snow_df.apply(lambda x: native_pd.DataFrame([1, 2]), axis=1).to_pandas()
+
+
+def test_axis_1_apply_args_kwargs():
+    def f(x, y, z=1) -> int:
+        return x.sum() + y + z
+
+    native_df = native_pd.DataFrame([[1, 2], [3, 4]])
+    snow_df = pd.DataFrame([[1, 2], [3, 4]])
+
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.apply(f, axis=1),
+            expect_exception=True,
+            expect_exception_type=SnowparkSQLException,
+            expect_exception_match="missing 1 required positional argument",
+            assert_exception_equal=False,
+        )
+
+    with SqlCounter(query_count=4):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda x: x.apply(f, axis=1, args=(1,))
+        )
+
+    with SqlCounter(query_count=4):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda x: x.apply(f, axis=1, args=(1,), z=2)
+        )
+
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.apply(f, axis=1, args=(1,), z=2, v=3),
+            expect_exception=True,
+            expect_exception_type=SnowparkSQLException,
+            expect_exception_match="got an unexpected keyword argument",
+            assert_exception_equal=False,
+        )
+
+
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+class TestDefault2Pandas:
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize("data, func, return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_axis_0(self, data, func, return_type):
+        native_df = native_pd.DataFrame(data)
+        snow_df = pd.DataFrame(data)
+        eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func))
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize("result_type", ["reduce", "expand", "broadcast"])
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_result_type(self, result_type):
+        snow_df = pd.DataFrame([[1, 2], [3, 4]])
+        native_df = native_pd.DataFrame([[1, 2], [3, 4]])
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.apply(lambda x: [1, 2], result_type=result_type),
+        )
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @sql_count_checker(
+        query_count=20, fallback_count=2, sproc_count=2, expect_high_count=True
+    )
+    def test_axis_1_apply_args_kwargs_with_snowpandas_object(self):
+        def f(x, y=None) -> native_pd.Series:
+            return x + (y if y is not None else 0)
+
+        native_df = native_pd.DataFrame([[1, 2], [3, 4]])
+        snow_df = pd.DataFrame([[1, 2], [3, 4]])
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_df.apply(f, axis=1, args=(pd.Series([1, 2]),)),
+            native_df.apply(f, axis=1, args=(native_pd.Series([1, 2]),)),
+        )
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_df.apply(f, axis=1, y=pd.Series([1, 2])),
+            native_df.apply(f, axis=1, y=native_pd.Series([1, 2])),
+        )
+
+
+TEST_INDEX_1 = native_pd.MultiIndex.from_tuples(
+    list(zip(*[["a", "b"], ["x", "y"]])),
+    names=["first", "last"],
+)
+
+
+TEST_INDEX_WITH_NULL_1 = native_pd.MultiIndex.from_tuples(
+    list(zip(*[[None, "b"], ["x", None]])),
+    names=["first", "last"],
+)
+
+
+TEST_INDEX_2 = native_pd.MultiIndex.from_tuples(
+    list(zip(*[["AA", "BB"], ["XX", "YY"]])),
+    names=["FOO", "BAR"],
+)
+
+
+TEST_INDEX_WITH_NULL_2 = native_pd.MultiIndex.from_tuples(
+    list(zip(*[[None, "BB"], ["XX", None]])),
+    names=["FOO", "BAR"],
+)
+
+
+TEST_COLUMNS_1 = native_pd.MultiIndex.from_tuples(
+    list(
+        zip(
+            *[
+                ["car", "motorcycle", "bike", "bus"],
+                ["blue", "green", "red", "yellow"],
+            ]
+        )
+    ),
+    names=["vehicle", "color"],
+)
+
+
+@pytest.mark.parametrize(
+    "apply_func",
+    [
+        lambda x: -x,
+        lambda x: native_pd.Series([1, 2], index=TEST_INDEX_1),
+        lambda x: native_pd.Series([3, 4], index=TEST_INDEX_2),
+        lambda x: native_pd.Series([1, 2], index=TEST_INDEX_WITH_NULL_1),
+        lambda x: native_pd.Series([1, 2], index=TEST_INDEX_WITH_NULL_1),
+    ],
+)
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_multi_index_column_labels(apply_func):
+    data = [[i + j for j in range(0, 4)] for i in range(0, 4)]
+
+    native_df = native_pd.DataFrame(data, columns=TEST_COLUMNS_1)
+    snow_df = pd.DataFrame(data, columns=TEST_COLUMNS_1)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.apply(apply_func, axis=1)
+    )
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_multi_index_column_labels_with_different_results():
+    data = [[i + j for j in range(0, 4)] for i in range(0, 4)]
+
+    df = native_pd.DataFrame(data, columns=TEST_COLUMNS_1)
+    snow_df = pd.DataFrame(df)
+
+    apply_func = (
+        lambda x: native_pd.Series([1, 2], index=TEST_INDEX_1)
+        if min(x) == 0
+        else native_pd.Series([3, 4], index=TEST_INDEX_2)
+    )
+
+    eval_snowpark_pandas_result(snow_df, df, lambda df: df.apply(apply_func, axis=1))
+
+
+def test_axis_1_multi_index_column_labels_none_names():
+    data = [[i + j for j in range(0, 4)] for i in range(0, 4)]
+
+    cols_1 = TEST_COLUMNS_1.copy()
+    cols_1.names = [None, None]
+
+    native_df = native_pd.DataFrame(data, columns=cols_1)
+    snow_df = pd.DataFrame(data, columns=cols_1)
+
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda x: x.apply(lambda x: x, axis=1)
+        )
+
+    test_index_1 = TEST_INDEX_1
+    test_index_1.names = [None, None]
+
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.apply(
+                lambda x: native_pd.Series([1, 2], index=test_index_1), axis=1
+            ),
+        )
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_axis_1_multi_index_column_labels_different_lengths():
+    data = [[i + j for j in range(0, 4)] for i in range(0, 4)]
+
+    df = native_pd.DataFrame(data, columns=TEST_COLUMNS_1)
+    snow_df = pd.DataFrame(df)
+
+    apply_func = (
+        lambda x: native_pd.Series([5, 6, 7, 8], index=[0, 1, -1, -2])
+        if min(x) == 0
+        else native_pd.Series([9, 8, 7], index=[4, 5, 6])
+    )
+
+    eval_snowpark_pandas_result(snow_df, df, lambda df: df.apply(apply_func, axis=1))
+
+
+@sql_count_checker(query_count=3)
+def test_axis_1_multi_index_column_labels_different_levels_negative():
+    data = [[i + j for j in range(0, 4)] for i in range(0, 4)]
+
+    native_df = native_pd.DataFrame(data, columns=TEST_COLUMNS_1)
+    snow_df = pd.DataFrame(data, columns=TEST_COLUMNS_1)
+
+    test_index_1 = pd.MultiIndex.from_tuples(
+        list(zip(*[["a", "b"], ["x", "y"]])),
+    )
+
+    test_index_2 = pd.MultiIndex.from_tuples(
+        list(zip(*[["S", "T"], ["W", "X"], ["G", "H"]])),
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.apply(
+            lambda x: native_pd.Series([5, 6], index=test_index_1)
+            if min(x) == 0
+            else native_pd.Series([9, 8, 7], index=test_index_2),
+            axis=1,
+        ),
+        expect_exception=True,
+        expect_exception_type=SnowparkSQLException,
+        assert_exception_equal=False,
+    )
+
+
+def test_apply_variant_json_null():
+    # series -> scalar
+    def f(v):
+        x = v.sum()
+        if x == 1:
+            return None
+        elif x == 2:
+            return np.nan
+        elif x == 3:
+            return native_pd.NA
+        else:
+            return x
+
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
+        df = pd.DataFrame([1, 2, 3, 4])
+        assert df.apply(f, axis=1).isna().tolist() == [False, True, True, False]
+
+    # series -> series
+    def g(v):
+        x = v.sum()
+        if x == 1:
+            return native_pd.Series([None])
+        elif x == 2:
+            return native_pd.Series(np.nan)
+        elif x == 3:
+            return native_pd.Series(native_pd.NA)
+        else:
+            return v
+
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
+        assert df.apply(g, axis=1).isna().to_numpy().tolist() == [
+            [False],
+            [True],
+            [True],
+            [False],
+        ]
+
+
+TRANSFORM_DATA_FUNC_MAP = [
+    [[[0, 1, 2], [1, 2, 3]], lambda x: x + 1],
+    [[[0, 1, 2], [1, 2, 3]], np.exp],
+    [[[0, 1, 2], [1, 2, 3]], "exp"],
+    [[["Leonhard", "Jianzhun"]], lambda x: x + " is awesome!!"],
+    [[[1.3, 2.5]], np.sqrt],
+    [[[1.3, 2.5]], "sqrt"],
+    [[[1.3, 2.5]], np.log],
+    [[[1.3, 2.5]], "log"],
+    [[[1.3, 2.5]], np.square],
+    [[[1.3, 2.5]], "square"],
+    [
+        [[None, "abcd"]],
+        lambda x: x + " are first 4 letters of alphabet" if x is not None else None,
+    ],
+    [[[1.5, float("nan")]], lambda x: np.sqrt(x)],
+]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize("data, apply_func", TRANSFORM_DATA_FUNC_MAP)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_basic_dataframe_transform(data, apply_func):
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.transform(apply_func), atol=0.1
+    )
+
+
+AGGREGATION_FUNCTIONS = [
+    np.max,
+    np.min,
+    np.sum,
+    np.mean,
+    np.median,
+    np.std,
+    np.var,
+]
+
+
+@pytest.mark.parametrize("func", AGGREGATION_FUNCTIONS)
+@sql_count_checker(query_count=0)
+def test_dataframe_transform_aggregation_negative(func):
+    snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
+    with pytest.raises(
+        ValueError,
+        match="Function did not transform",
+    ):
+        snow_df.transform(func)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=4)
+def test_dataframe_transform_invalid_function_name_negative(session):
+    snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
+    with pytest.raises(
+        SnowparkSQLException,
+        match="Python Interpreter Error",
+    ):
+        snow_df.transform("mxyzptlk")
+
+
+# check that we throw the same error as pandas for cases where the function type is
+# invalid.
+INVALID_TYPES_FOR_TRANSFORM = [
+    1,
+    2.5,
+    True,
+]
+
+
+@pytest.mark.parametrize("func", INVALID_TYPES_FOR_TRANSFORM)
+@sql_count_checker(query_count=0)
+def test_dataframe_transform_invalid_types_negative(func):
+    snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
+    with pytest.raises(
+        TypeError,
+        match="object is not callable",
+    ):
+        snow_df.transform(func)
+
+
+@sql_count_checker(
+    high_count_expected=True,
+    high_count_reason="SNOW-1001470 test multiple apply",
+    query_count=8,
+    udtf_count=0,
+    udf_count=2,
+    join_count=0,
+)
+@pytest.mark.parametrize("is_sorted", [True, False])
+def test_fix_1001470(is_sorted):
+    test_df = pd.DataFrame({"income": [5000, 15000, 30000], "col": [3, 2, 1]})
+    if is_sorted:
+        # If it's originally sorted, then after apply, the order should still be maintained
+        test_df = test_df.sort_values(by="col")
+
+    def foo(row) -> float:
+        income = row["income"]
+        return income * 2
+
+    # test calling apply twice and the result should be the same and no error raises
+    test_df["A"] = test_df.apply(foo, axis=1)
+    ans1 = test_df[test_df["A"] > 25000].values
+
+    test_df["A"] = test_df.apply(foo, axis=1)
+    ans2 = test_df[test_df["A"] > 25000].values
+
+    assert np.array_equal(ans1, ans2)
+
+
+def test_bug_SNOW_1172448():
+    # This test case checks that reading a table + apply work together. Before SNOW-1172448 there
+    # was a bug where the join within _apply_with_udtf_and_dynamic_pivot_along_axis_1 would fail
+    # due to index identifiers being not disambiguous.
+    df = pd.DataFrame(
+        data=np.random.normal(size=(100, 3)), columns=["A", "AMT_INCOME_TOTAL", "C"]
+    )
+    temp_table_name = random_name_for_temp_object(TempObjectType.TABLE)
+
+    df.to_snowflake(temp_table_name, index_label="index")
+    df = pd.read_snowflake(temp_table_name)
+
+    median_income = 187524.29
+    std_income = 110086.85
+
+    # UDF to apply row-wise
+    def foo(row):
+        income = row["AMT_INCOME_TOTAL"]
+        return (income - median_income) / std_income
+
+    with SqlCounter(query_count=6, join_count=3, udtf_count=1):
+        df["pct_income"] = df.apply(foo, axis=1)
+        # trigger computation here.
+        ans = len(df[df["pct_income"] > 0.5]) / len(df)
+        assert isinstance(ans, float)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        np.random.normal(loc=100, scale=30, size=(DEFAULT_UDTF_PARTITION_SIZE // 2, 3)),
+        np.random.normal(loc=100, scale=30, size=(DEFAULT_UDTF_PARTITION_SIZE, 3)),
+        np.random.normal(
+            loc=100, scale=30, size=(int(DEFAULT_UDTF_PARTITION_SIZE * 10.7), 3)
+        ),
+    ],
+)
+@sql_count_checker(
+    query_count=18,
+    udtf_count=1,
+    join_count=3,
+    high_count_expected=True,
+    high_count_reason="upload of larger data, udtf registration, additional temp table creation till bugfix in SNOW-1060191 is in",
+)
+def test_dataframe_relative_to_default_partition_size_with_apply_udtf(data):
+    # test here that a Dataframe with size <, =, > than the default udtf partition size gets processed correctly.
+    df = pd.DataFrame(data, columns=["A", "B", "C"])
+
+    def foo(row):
+        income = row["C"]
+        return (income - 100) / 30
+
+    df["normalized_C"] = df.apply(foo, axis=1)
+
+    assert len(df) == len(data)
+
+
+def test_with_duplicates_negative():
+    df = native_pd.DataFrame([[1, 2], [3, 4]])
+    snow_df = pd.DataFrame(df)
+
+    def foo(x):
+        return native_pd.Series(
+            [1, 2, 3], index=["C", "A", "E"] if x.sum() > 3 else ["A", "E", "E"]
+        )
+
+    # Snowpark pandas and pandas deviate here wrt to the exception type thrown.
+    # We expose this here by design, as we used to decide against an exception translation mechanism for UDFs.
+    # I.e., Snowpark pandas will surface the error as SnowparkSQLException to allow to identify the error as being
+    # server-side.
+    with pytest.raises(
+        ValueError, match="cannot reindex on an axis with duplicate labels"
+    ):
+        df.apply(foo, axis=1)
+
+    with SqlCounter(query_count=3, join_count=0, udtf_count=0):
+        with pytest.raises(
+            SnowparkSQLException,
+            match="cannot reindex on an axis with duplicate labels",
+        ):
+            snow_df.apply(foo, axis=1)
+
+
+@pytest.mark.parametrize("partition_size", [1, 2])
+@pytest.mark.parametrize("data", [{"a": [1], "b": [2]}, {"a": [2], "b": [3]}])
+def test_apply_axis_1_with_if_where_duplicates_not_executed(partition_size, data):
+    df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(df)
+
+    def foo(x):
+        return native_pd.Series(
+            [1, 2, 3], index=["C", "A", "E"] if x.sum() > 3 else ["A", "E", "E"]
+        )
+
+    def helper(df):
+        kwargs = {}
+        if isinstance(df, pd.DataFrame):
+            kwargs["snowpark_pandas_partition_size"] = partition_size
+
+        return df.apply(foo, axis=1, **kwargs)
+
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
+        eval_snowpark_pandas_result(snow_df, df, helper)
+
+
+@sql_count_checker(query_count=5, udtf_count=1, join_count=2)
+@pytest.mark.parametrize(
+    "return_value",
+    [
+        native_pd.Series(["a", np.int64(3)]),
+        param(
+            ["a", np.int64(3)],
+            marks=pytest.mark.xfail(
+                strict=True,
+                raises=SnowparkSQLException,
+                reason="SNOW-1229760: un-json-serializable np.int64 is nested "
+                + "inside the non-series return value, so we don't find "
+                + "the np.int64 or convert it to int",
+            ),
+        ),
+        np.int64(3),
+    ],
+)
+def test_numpy_integers_in_return_values_snow_1227264(return_value):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(["a"]), lambda df: df.apply(lambda row: return_value, axis=1)
+    )
+
+
+@pytest.mark.parametrize(
+    "null_value",
+    [
+        param(
+            None,
+            marks=pytest.mark.xfail(
+                strict=True, raises=SnowparkSQLException, reason="SNOW-1233832"
+            ),
+        ),
+        np.nan,
+    ],
+)
+@sql_count_checker(query_count=5, udtf_count=1, join_count=2)
+def test_apply_axis_1_frame_with_column_of_all_nulls_snow_1233832(null_value):
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"null_col": [null_value], "int_col": [1]}),
+        lambda df: df.apply(lambda row: "a", axis=1)
+    )
+
+
+import scipy.stats  # noqa: E402
+
+# used for testing
+import statsmodels  # noqa: E402
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=SnowparkSQLException,
+)
+@pytest.mark.parametrize(
+    "packages,expected_query_count",
+    [
+        (["statsmodels", "numpy"], 11),
+        (["statsmodels==0.14.0", "numpy>=1.0"], 11),
+        pytest.param(
+            [statsmodels, np],
+            15,
+            marks=pytest.mark.xfail(
+                reason="Snowpark package resolver mismatch, resolved to 2.2.1 from statsmodels but clashes with Snowpark pandas version 2.1.4.",
+            ),
+        ),
+    ],
+)
+def test_apply_axis0_with_3rd_party_libraries_and_decorator(
+    packages, expected_query_count
+):
+    x = np.linspace(0, 5, 100)
+    y = x + np.random.normal(size=len(x))
+    data = {"XY": list(zip(list(x), list(y)))}
+
+    with SqlCounter(
+        query_count=expected_query_count,
+        fallback_count=1,
+        sproc_count=1,
+        high_count_expected=True,
+        high_count_reason="package upload",
+    ):
+
+        df = pd.DataFrame(data)
+        # Capture setting.
+        custom_package_usage_config = pd.session.custom_package_usage_config.get(
+            "enabled", False
+        )
+
+        try:
+            pd.session.custom_package_usage_config["enabled"] = True
+
+            @udf(packages=packages, return_type=PandasSeriesType(DoubleType()))
+            def func(column):
+                import pandas as pd  # noqa: F401
+                import statsmodels.api as sm
+                from statsmodels.stats.outliers_influence import OLSInfluence
+
+                X = column.apply(lambda t: t[0])
+                y = column.apply(lambda t: t[1])
+
+                X = sm.add_constant(X)
+                fit = sm.OLS(y, X).fit()
+
+                influence = OLSInfluence(fit)
+                return influence.resid
+
+            ans = df.apply(func, axis=0)
+        finally:
+            pd.session.clear_packages()
+            pd.session.clear_imports()
+
+            # Restore setting.
+            pd.session.custom_package_usage_config[
+                "enabled"
+            ] = custom_package_usage_config
+
+        # apply same function via native pandas and compare results
+        native_ans = native_pd.DataFrame(data).apply(func.func, axis=0)
+
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(ans, native_ans)
+
+
+@pytest.mark.parametrize(
+    "packages,expected_query_count",
+    [
+        (["scipy", "numpy"], 7),
+        (["scipy>1.1", "numpy<2.0"], 7),
+        ([scipy, np], 9),
+    ],
+)
+def test_apply_axis1_with_3rd_party_libraries_and_decorator(
+    packages, expected_query_count
+):
+    data = [[1, 2, 3, 4, 5], [7, -20, 4.0, 7.0, None]]
+
+    with SqlCounter(
+        query_count=expected_query_count,
+        high_count_expected=True,
+        high_count_reason="Snowpark package upload requires many queries.",
+    ):
+        try:
+            pd.session.custom_package_usage_config["enabled"] = True
+            df = pd.DataFrame(data)
+
+            @udf(packages=packages, return_type=DoubleType())
+            def func(row):
+                return np.dot(row, scipy.stats.norm.pdf(row))
+
+            snow_ans = df.apply(func, axis=1)
+        finally:
+            pd.session.clear_packages()
+            pd.session.clear_imports()
+
+        # same in native pandas:
+        native_df = native_pd.DataFrame(data)
+        native_ans = native_df.apply(func.func, axis=1)
+
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+@pytest.mark.parametrize(
+    "packages,expected_query_count",
+    [
+        (["scipy", "numpy"], 7),
+        (["scipy>1.1", "numpy<2.0"], 7),
+        ([scipy, np], 9),
+    ],
+)
+@pytest.mark.xfail(
+    reason="TODO: SNOW-1261830 need to support PandasSeriesType annotation."
+)
+def test_apply_axis1_with_dynamic_pivot_and_with_3rd_party_libraries_and_decorator(
+    packages, expected_query_count
+):
+    # This test checks the code path with dynamic pivot for axis=1.
+
+    data = [[1, 2, 3, 4, 5], [7, -20, 4.0, 7.0, None]]
+
+    with SqlCounter(query_count=expected_query_count):
+        try:
+            pd.session.custom_package_usage_config["enabled"] = True
+            df = pd.DataFrame(data)
+
+            @udf(packages=packages, return_type=PandasSeriesType(DoubleType()))
+            def func(row):
+                import pandas as pd
+
+                x = np.dot(row, scipy.stats.norm.pdf(row))
+                return pd.concat((row, pd.Series(x)))
+
+            snow_ans = df.apply(func, axis=1)
+        finally:
+            pd.session.clear_packages()
+            pd.session.clear_imports()
+
+        # same in native pandas:
+        native_df = native_pd.DataFrame(data)
+        native_ans = native_df.apply(func.func, axis=1)
+
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
diff --git a/tests/integ/modin/frame/test_applymap.py b/tests/integ/modin/frame/test_applymap.py
new file mode 100644
index 00000000000..87a98e70782
--- /dev/null
+++ b/tests/integ/modin/frame/test_applymap.py
@@ -0,0 +1,185 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.series.test_apply import (
+    BASIC_DATA_FUNC_RETURN_TYPE_MAP,
+    DATE_TIME_TIMESTAMP_DATA_FUNC_RETURN_TYPE_MAP,
+    TEST_NUMPY_FUNCS,
+    create_func_with_return_type_hint,
+)
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+@sql_count_checker(query_count=7, udf_count=1)
+def test_applymap_basic_without_type_hints(data, func, return_type):
+    frame_data = {0: data, 1: data}
+    native_df = native_pd.DataFrame(frame_data)
+    snow_df = pd.DataFrame(frame_data)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(func))
+
+
+@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+@sql_count_checker(query_count=7, udf_count=1)
+def test_applymap_basic_with_type_hints(data, func, return_type):
+    func_with_type_hint = create_func_with_return_type_hint(func, return_type)
+
+    frame_data = {0: data, 1: data}
+    native_df = native_pd.DataFrame(frame_data)
+    snow_df = pd.DataFrame(frame_data)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.applymap(func_with_type_hint)
+    )
+
+
+@pytest.mark.parametrize(
+    "data,func,return_type,expected_result",
+    DATE_TIME_TIMESTAMP_DATA_FUNC_RETURN_TYPE_MAP,
+)
+@sql_count_checker(query_count=7, udf_count=1)
+def test_applymap_date_time_timestamp(data, func, return_type, expected_result):
+    func_with_type_hint = create_func_with_return_type_hint(func, return_type)
+
+    # concat the expected result (which is series) to a dataframe
+    frame_data = {0: data, 1: data}
+    frame_expected_result = native_pd.concat([expected_result, expected_result], axis=1)
+
+    snow_df = pd.DataFrame(frame_data)
+    result = snow_df.applymap(func_with_type_hint)
+    assert_snowpark_pandas_equal_to_pandas(result, frame_expected_result)
+
+
+def test_applymap_kwargs():
+    def f(x, y=1) -> int:
+        return x + y
+
+    data = [[1, 2], [3, 4]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(data)
+
+    with SqlCounter(query_count=7, udf_count=1):
+        eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(f, y=2))
+
+    with SqlCounter(query_count=6):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.applymap(f, y=2, z=3),
+            expect_exception=True,
+            expect_exception_type=SnowparkSQLException,
+            expect_exception_match="got an unexpected keyword argument",
+            assert_exception_equal=False,
+        )
+
+
+@pytest.mark.parametrize("func", TEST_NUMPY_FUNCS)
+def test_applymap_numpy(func):
+    data = [[1.0, 2.0], [3.0, 4.0]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(data)
+
+    with SqlCounter(query_count=7, udf_count=1):
+        eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.applymap(func))
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(
+    query_count=16, fallback_count=2, sproc_count=2, expect_high_count=True
+)
+def test_applymap_na_action_ignore():
+    snow_df = pd.DataFrame([1, 1.1, "NaN", None], dtype="Float64")
+
+    # In native pandas, the last two elements are NaN and pd.NA
+    assert snow_df.applymap(
+        lambda x: x is None, na_action="ignore"
+    ).values.tolist() == [[False], [False], [None], [None]]
+
+    data = ["cat", "dog", np.nan, "rabbit"]
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.applymap("I am a {}".format, na_action="ignore"),
+    )
+
+
+@pytest.mark.parametrize("invalid_input", ["min", [np.min], {"a": np.max}])
+@sql_count_checker(query_count=0)
+def test_applymap_invalid_input(invalid_input):
+    snow_df = pd.DataFrame([1])
+    native_df = native_pd.DataFrame([1])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.applymap(invalid_input),
+        expect_exception=True,
+        expect_exception_match="is not callable",
+        assert_exception_equal=False,
+    )
+
+
+def test_preserve_order():
+    native_df = native_pd.DataFrame([[10, 2.12], [3.356, 4.567]])
+    df = pd.DataFrame(native_df)
+
+    with SqlCounter(query_count=7, udf_count=1):
+        eval_snowpark_pandas_result(df, native_df, lambda x: x.applymap(lambda y: -y))
+
+    native_df = native_df.sort_values(0)
+    df = pd.DataFrame(native_df)
+
+    """
+    >>> df.sort_values(0)
+            0      1
+    1   3.356  4.567
+    0  10.000  2.120
+
+    >>> df.applymap(lambda y: -y)
+            0      1
+    1  -3.356 -4.567
+    0 -10.000 -2.120
+    """
+    with SqlCounter(query_count=7, udf_count=1):
+        eval_snowpark_pandas_result(df, native_df, lambda x: x.applymap(lambda y: -y))
+
+
+def test_applymap_variant_json_null():
+    def f(x):
+        if native_pd.isna(x):
+            return x
+        elif x == 1:
+            return None
+        elif x == 2:
+            return np.nan
+        elif x == 3:
+            return native_pd.NA
+        else:
+            return x
+
+    # the last column is a variant column [None, pd.NA], where both None and pd.NA
+    # are mapped to SQL null by Python UDF in the input
+    df = pd.DataFrame([[1, 2, None], [3, 4, pd.NA]])
+    with SqlCounter(query_count=9):
+        df = df.applymap(f)
+
+    with SqlCounter(query_count=1, udf_count=1):
+        assert df.isna().to_numpy().tolist() == [
+            [False, True, True],
+            [True, False, True],
+        ]
diff --git a/tests/integ/modin/frame/test_astype.py b/tests/integ/modin/frame/test_astype.py
new file mode 100644
index 00000000000..954c6e350d0
--- /dev/null
+++ b/tests/integ/modin/frame/test_astype.py
@@ -0,0 +1,94 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.series.test_astype import (
+    basic_types,
+    get_expected_dtype,
+    get_expected_to_pandas_dtype,
+)
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
+from tests.utils import Utils
+
+
+@sql_count_checker(query_count=1)
+def test_series_input():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [2.4, 2.5, 3.1]})
+    astype = pd.Series({"a": "str", "b": "str"})
+    ret = df.astype(astype)
+    print(ret.dtypes)
+    assert_series_equal(
+        ret.dtypes, native_pd.Series({"a": np.object_, "b": np.object_})
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_input_negative():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [2.4, 2.5, 3.1]})
+    with pytest.raises(KeyError, match="not found in columns"):
+        df.astype({"a": str, "c": str})
+
+    native_astype = native_pd.Series({"a": str, "c": str})
+    with pytest.raises(TypeError, match="Please convert this to Snowpark pandas"):
+        df.astype(native_astype)
+
+    astype = pd.Series(["str", "str"], index=["a", "a"])
+    with pytest.raises(
+        ValueError, match="The new Series of types must have a unique index"
+    ):
+        df.astype(astype)
+
+
+@pytest.mark.parametrize("to_dtype", basic_types())
+def test_astype_from_timestamp_ltz(session, to_dtype):
+    test_table_name = "test_astype_from_timestamp_ltz"
+    col_name_type = "timestamp_ltz timestamp_ltz"
+    Utils.create_table(session, test_table_name, col_name_type, is_temporary=True)
+    session.sql(
+        f"insert into {test_table_name} values ('2023-01-01 00:00:01.000000001'), ('2023-12-31 23:59:59.999999999')"
+    ).collect()
+    snow = pd.read_snowflake(test_table_name)
+    native = snow.to_pandas()
+    expected_dtype = get_expected_dtype(to_dtype)
+    expected_to_pandas_dtype = get_expected_to_pandas_dtype(to_dtype, expected_dtype)
+    if to_dtype == "datetime64[ns]":
+        # Native pandas after 2.0 disallows using astype to convert from timzone-aware to timezone-naive
+        # This remains valid in Snowflake, so Snowpark pandas performs the conversion anyway
+        with SqlCounter(query_count=1):
+            assert_frame_equal(
+                snow.astype(to_dtype),
+                native.map(lambda col: col.tz_convert("UTC").tz_localize(None)),
+                check_dtype=False,
+                check_datetimelike_compat=True,
+                check_index_type=False,
+            )
+        with pytest.raises(
+            TypeError,
+            match="Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype.",
+        ):
+            native.astype(expected_to_pandas_dtype)
+    elif "float" in str(to_dtype).lower():
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError, match="cannot be converted"):
+                snow.astype(to_dtype).to_pandas()
+            with pytest.raises(TypeError, match="Cannot cast DatetimeArray to dtype"):
+                native.astype(expected_to_pandas_dtype)
+    else:
+        with SqlCounter(query_count=1):
+            s = snow.astype(to_dtype)
+            assert s.dtypes[0] == expected_dtype
+            expected_to_pandas = native.astype(expected_to_pandas_dtype)
+            assert_frame_equal(
+                snow.astype(to_dtype),
+                expected_to_pandas,
+                check_dtype=False,
+                check_datetimelike_compat=True,
+                check_index_type=False,
+            )
diff --git a/tests/integ/modin/frame/test_axis.py b/tests/integ/modin/frame/test_axis.py
new file mode 100644
index 00000000000..281f633f453
--- /dev/null
+++ b/tests/integ/modin/frame/test_axis.py
@@ -0,0 +1,938 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import VALID_PANDAS_LABELS, eval_snowpark_pandas_result
+
+
+def assert_axes_result_equal(snow_res, pd_res):
+    assert len(snow_res) == len(pd_res)
+    # exact is set to False so that we only compare the values within the index,
+    # not the dtype or class of the Index type.
+    # Unlike pandas, Snowpark pandas never uses RangeIndex objects, and instead will use
+    # the generic Index object, such as `Index([0, 1, 2], dtype="int8")` in place of
+    # `RangeIndex(start=0, stop=3, step=1)`.
+    # Furthermore, the Snowflake query result uses smaller dtypes when possible; that is,
+    # a 128-element index such as `pd.Series([0] * 128).index` will have dtype int8, while
+    # a 129-element index such as `pd.Series([0] * 129).index` will have dtype int16.
+    assert_index_equal(snow_res[0], pd_res[0], exact=False)
+    assert_index_equal(snow_res[1], pd_res[1], exact=False)
+
+
+test_dfs = [
+    native_pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),  # set column names with dict
+    native_pd.DataFrame([(1, 3), (2, 4)]),  # without column names
+    native_pd.DataFrame(
+        [(1, 3), (2, 4)], columns=["a", "b"]
+    ),  # set column names using columns=
+    native_pd.DataFrame([(1, 3), (2, 4)], columns=["a", "b"]).set_index(
+        "a"
+    ),  # with index
+    native_pd.DataFrame(
+        [(1, 3), (2, 4)], columns=[("A", "a"), ("B", "b")]
+    ),  # column names are tuples
+]
+
+
+@pytest.mark.parametrize("test_df", test_dfs)
+@sql_count_checker(query_count=1)
+def test_axes(test_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: df.axes,
+        comparator=assert_axes_result_equal,
+    )
+
+
+@pytest.mark.parametrize("test_df", test_dfs)
+@sql_count_checker(query_count=1)
+def test_index(test_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: df.index,
+        comparator=assert_index_equal,
+        # exact is set to False so that we only compare the values within the index,
+        # not the dtype or class of the Index type.
+        # Unlike pandas, Snowpark pandas never uses RangeIndex objects, and instead will use
+        # the generic Index object, such as `Index([0, 1, 2], dtype="int8")` in place of
+        # `RangeIndex(start=0, stop=3, step=1)`.
+        # Furthermore, the Snowflake query result uses smaller dtypes when possible; that is,
+        # a 128-element index such as `pd.Series([0] * 128).index` will have dtype int8, while
+        # a 129-element index such as `pd.Series([0] * 129).index` will have dtype int16.
+        exact=False,
+    )
+
+
+@pytest.mark.parametrize("test_df", test_dfs)
+@sql_count_checker(query_count=8, join_count=3)
+def test_set_and_assign_index(test_df):
+    def assign_index(df, keys):
+        df.index = keys
+        return df.index
+
+    def set_index(df, keys):
+        df.set_index(keys)
+        return df.index
+
+    new_index = pd.Index(np.random.rand(len(test_df)))
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: assign_index(df, new_index),
+        comparator=assert_index_equal,
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: set_index(df, df.columns[0]),
+        comparator=assert_index_equal,
+    )
+
+    new_mi = pd.MultiIndex.from_arrays(
+        [np.random.rand(len(test_df)), np.random.rand(len(test_df))],
+        names=["mi1", "mi2"],
+    )
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: assign_index(df, new_mi),
+        comparator=assert_index_equal,
+    )
+
+
+@pytest.mark.parametrize("test_df", test_dfs)
+@sql_count_checker(query_count=0)
+def test_columns(test_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: df.columns,
+        comparator=assert_index_equal,
+    )
+
+
+def set_columns_func(df, labels):
+    df.columns = labels
+    return df.columns
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["a", "b"],
+        [1.3, 2],
+        pd.Index([1.3, 2]),
+        [None, int],
+        [(42, "test"), (1, 2, 3)],
+        pd.Index(["a", "b"]),
+        [("A",), ("B",)],
+        [("A", "a", 1), ("B", "b", 1)],
+        [["A", "a"], ["B", "b"]],
+        pd.MultiIndex.from_tuples([("A", "a"), ("B", "b")]),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_set_columns(columns):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dfs[0].copy()),
+        test_dfs[0].copy(),
+        lambda df: set_columns_func(df, labels=columns),
+        comparator=assert_index_equal,
+    )
+
+
+@pytest.mark.parametrize("col_name", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=0)
+def test_set_columns_valid_names(col_name):
+    test_df = native_pd.DataFrame({col_name: [1, 2, 3]})
+    # test valid column labels in from_pandas
+    # SNOW-823379 need multiindex as df.columns support for tuple column name
+    if type(col_name) is not tuple:
+        eval_snowpark_pandas_result(
+            pd.DataFrame(test_df),
+            test_df,
+            lambda df: df.columns,
+            comparator=assert_index_equal,
+        )
+    # test set valid column labels
+    test_df = native_pd.DataFrame([1, 2, 3])
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_df),
+        test_df,
+        lambda df: set_columns_func(df, labels=[col_name]),
+        comparator=assert_index_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "columns, error_type, error_msg",
+    [
+        (
+            "a",
+            TypeError,
+            "must be called with a collection of some kind, 'a' was passed",
+        ),
+        (
+            ["a"],
+            ValueError,
+            "Length mismatch: Expected axis has 2 elements, new values have 1 elements",
+        ),
+        (
+            pd.Index(["a"]),
+            ValueError,
+            "Length mismatch: Expected axis has 2 elements, new values have 1 elements",
+        ),
+        (
+            ["a", "b", "c"],
+            ValueError,
+            "Length mismatch: Expected axis has 2 elements, new values have 3 elements",
+        ),
+        (
+            [["A", "a", 1], ["B", "b", 1]],
+            ValueError,
+            "Length mismatch: Expected axis has 2 elements, new values have 3 elements",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_set_columns_negative(columns, error_type, error_msg):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dfs[0]),
+        test_dfs[0],
+        lambda df: set_columns_func(df, labels=columns),
+        comparator=assert_index_equal,
+        expect_exception=True,
+        expect_exception_type=error_type,
+        expect_exception_match=error_msg,
+    )
+
+
+@pytest.mark.parametrize("index_name", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=0)
+def test_set_columns_index_name(index_name):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dfs[0]),
+        test_dfs[0],
+        lambda df: set_columns_func(df, labels=pd.Index(["a", "b"], name=index_name)),
+        comparator=assert_index_equal,
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_duplicate_labels_assignment():
+    # Duplicate data labels
+    snow_df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
+    snow_df.columns = ["a", "a", "A"]
+    assert snow_df.columns.tolist() == ["a", "a", "A"]
+
+    # Duplicate between index and data label
+    snow_df = pd.DataFrame(
+        {"b": [1, 2]}, index=pd.RangeIndex(start=4, stop=6, step=1, name="a")
+    )
+    snow_df.columns = ["a"]
+    assert snow_df.columns.tolist() == ["a"]
+    assert snow_df.index.name == "a"
+
+    # Duplicate index labels
+    snow_df = pd.DataFrame(
+        {"z": [1, 2]},
+        index=pd.MultiIndex.from_arrays([["u", "v"], ["x", "y"]], names=("a", "a")),
+    )
+    assert snow_df.index.names == ["a", "a"]
+
+
+# Valid data
+# ----------
+# This data covers the happy cases for DataFrame.set_axis().
+# "index", "rows", "columns", 0, and 1 are valid axis values and are tested on different DataFrame objects
+# with valid labels. The valid labels include strings with different quotation marks, None values, numbers,
+# Index, and MultiIndex objects.
+# Format: df, axis, labels, and number of SQL queries for set_axis() that creates a copy
+# number of queries for set_axis() on self = number of queries for set_axis() on copy
+TEST_DATA_FOR_DF_SET_AXIS = [
+    # Set rows.
+    # TODO: uncomment test case when SNOW-933782 is fixed.
+    # [
+    #     native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
+    #     0,
+    #     native_pd.DataFrame(["'a'", "" "b" "", "c"]),
+    #     5,
+    # ],
+    [
+        native_pd.DataFrame({"A": [3.14, 1.414, 1.732], "B": [9.8, 1.0, 0]}),
+        "rows",
+        [None] * 3,
+        5,
+        2,
+    ],
+    [  # Labels is a MultiIndex from tuples.
+        native_pd.DataFrame({"A": [1, 2, 3], -2515 / 135: [4, 5, 6]}),
+        "index",
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA", "rR"), ("r1", "rB", "rS"), ("r2", "rC", "rT")],
+            names=["Courses", "Fee", "Random"],
+        ),
+        7,
+        6,
+    ],
+    [
+        native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        0,
+        {1: "c", 2: "b", 3: "a"},
+        5,
+        2,
+    ],
+    [
+        native_pd.DataFrame(
+            index=native_pd.MultiIndex.from_tuples(
+                [("r0", "rA"), ("r1", "rB")], names=["Courses", "Fee"]
+            ),
+            columns=native_pd.MultiIndex.from_tuples(
+                [
+                    ("Gasoline", "Toyota"),
+                    ("Gasoline", "Ford"),
+                    ("Electric", "Tesla"),
+                    ("Electric", "Nio"),
+                ]
+            ),
+            data=[[100, 300, 900, 400], [200, 500, 300, 600]],
+        ),
+        0,
+        ['"row 1"', "row 2"],
+        5,
+        2,
+    ],
+    [
+        native_pd.DataFrame(
+            index=native_pd.MultiIndex.from_arrays(
+                [[None] * 10, [None] * 10, [None] * 10], names=(None, None, None)
+            ),
+            columns=native_pd.MultiIndex.from_tuples([(None, None)] * 5),
+            data=[[None] * 5] * 10,
+        ),
+        "rows",
+        list(range(10)),
+        5,
+        2,
+    ],
+    [
+        native_pd.DataFrame(
+            index=native_pd.MultiIndex.from_frame(
+                native_pd.DataFrame(
+                    [
+                        ["HI", "Temp"],
+                        ["HI", "Precip"],
+                        ["NJ", "Temp"],
+                        ["NJ", "Precip"],
+                    ],
+                    columns=["a", "b"],
+                )
+            ),
+            columns=["column0", "column2", "column4", "column6"],
+            data=[
+                [0, 1, 2, 3],
+                [4, 5, 6, 7],
+                [8, 9, 10, 11],
+                [12, 13, 14, 15],
+            ],
+        ),
+        "index",
+        native_pd.MultiIndex.from_product(
+            [["NJ", "CA"], ["temp", "precip"]], names=["number", "color"]
+        ),
+        6,
+        4,
+    ],
+    # Set columns.
+    [
+        native_pd.DataFrame(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "" "baz" "", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        "columns",
+        ["index"] * 4,
+        3,
+        6,
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "" "C" "": [4, 5, 6],
+                "'D'": [7, 8, 9],
+                -0.0123: [-1, -2, -3],
+            }
+        ),
+        1,
+        {99, 999, 9999, 99999, 999999},
+        3,
+        6,
+    ],
+    [
+        native_pd.DataFrame({1: [1, 11, 111], 2: [2, 22, 222], 9: [9, 99, 999]}),
+        1,
+        native_pd.Index(["0", "00", "000"]),
+        3,
+        6,
+    ],
+    [  # Labels is a MultiIndex from arrays.
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        "columns",
+        native_pd.MultiIndex.from_arrays(
+            [
+                ["genmaicha", "peppermint", "jasmine", "spice", "earl grey"],
+                [5, 2.5, 6, 1, 3.5],
+                ["mild", "none", "mild", "high", "high"],
+            ],
+            names=("tea", "steep time", "caffeine"),
+        ),
+        3,
+        6,
+    ],
+    [
+        native_pd.DataFrame(
+            index=list(range(3)),
+            columns=native_pd.MultiIndex.from_product(
+                [[0, 1, 2], ["green", "purple"]], names=["number", "color"]
+            ),
+            data=[
+                ["cyan", "pink", "blue", "brown", "violet", "gold"],
+                ["brown", "violet", "gold", "silver", "white", "grey"],
+                ["silver", "white", "grey", "cyan", "pink", "blue"],
+            ],
+        ),
+        1,
+        list(range(6)),
+        3,
+        6,
+    ],
+    [
+        native_pd.DataFrame(
+            index=native_pd.MultiIndex.from_tuples(
+                [("r0", "rA"), ("r1", "rB")], names=["Courses", "Fee"]
+            ),
+            columns=native_pd.MultiIndex.from_tuples(
+                [
+                    ("Gasoline", "Toyota"),
+                    ("Gasoline", "Ford"),
+                    ("Electric", "Tesla"),
+                    ("Electric", "Nio"),
+                ]
+            ),
+            data=[[100, 300, 900, 400], [200, 500, 300, 600]],
+        ),
+        "columns",
+        native_pd.MultiIndex.from_arrays(
+            [
+                ["genmaicha", "spice", "green", "earl grey"],
+                ["mild", "none", "high", "moderate"],
+            ],
+            names=("tea", "caffeine"),
+        ),
+        3,
+        6,
+    ],
+]
+
+
+# Invalid data which raises ValueError identical to native pandas
+# ---------------------------------------------------------------
+# Format: df, invalid axis, and invalid labels.
+# - This data cover the negative case for DataFrame.set_axis() with invalid axis and labels.
+# - Invalid axis values here consist of strings other than "index", "rows", and "columns",
+#   numbers other than 0 and 1, empty lists, None values.
+# - Invalid labels here consist of: passing None, too many values, too few values, empty list,
+# - Index, and MultiIndex objects as invalid labels for row-like axis.
+TEST_DATA_FOR_DF_SET_AXIS_RAISES_VALUE_ERROR = [
+    # invalid axis values.
+    [
+        native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        "indexes",  # invalid axis
+        ["a", "b", "c"],
+    ],
+    [
+        native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "column",  # invalid axis
+        ["I", "II"],
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        -1,  # invalid axis
+        {99, 999, 9999, 99999, 999999},
+    ],
+    [
+        native_pd.DataFrame({"A": [3.14, 1.414, 1.732], -0.9: [9.8, 1.0, 0]}),
+        None,  # invalid axis
+        [0] * 3,
+    ],
+    [
+        native_pd.DataFrame({-3.14: [3.14, 0, None], "B": [9.8, 1.0, 0]}),
+        0.000001,  # invalid axis
+        [0] * 3,
+    ],
+    [
+        native_pd.DataFrame(
+            {"A": ["a", "b", "c", "d", "e"], "B": ["e", "d", "c", "b", "a"]}
+        ),
+        -0.000001,  # invalid axis
+        native_pd.Index([11, 111, 1111, 11111, 111111]),
+    ],
+    [  # Index is a MultiIndex from tuples.
+        native_pd.DataFrame({"A": [1, 2, 3], -2515 / 135: [4, 5, 6]}),
+        -9999999999,  # invalid axis
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA", "rR"), ("r1", "rB", "rS"), ("r2", "rC", "rT")],
+            names=["Courses", "Fee", "Random"],
+        ),
+    ],
+    [  # Labels is a MultiIndex from arrays.
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        99999999,  # invalid axis
+        native_pd.MultiIndex.from_arrays(
+            [
+                ["genmaicha", "peppermint", "jasmine", "spice", "earl grey"],
+                [5, 2.5, 6, 1, 3.5],
+                ["mild", "none", "mild", "high", "high"],
+            ],
+            names=("tea", "steep time", "caffeine"),
+        ),
+    ],
+    # invalid column labels.
+    [
+        native_pd.DataFrame(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        1,
+        ["'index'"] * 5,  # too many labels
+    ],
+    [
+        native_pd.DataFrame({1: [1, 11, 111], 2: [2, 22, 222], 9: [9, 99, 999]}),
+        "columns",
+        native_pd.Index([]),  # too few labels
+    ],
+    [  # Labels is a MultiIndex from arrays.
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        "columns",
+        native_pd.MultiIndex.from_arrays(
+            [
+                [],
+                [],
+                [],
+            ],
+            names=("tea", "steep time", "caffeine"),
+        ),  # too few labels
+    ],
+    [  # Labels is a MultiIndex from a product.
+        native_pd.DataFrame({1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6]}),
+        1,
+        native_pd.MultiIndex.from_product(
+            [[0, 1, 2], ["green", "purple", "pink", "blue"]], names=["number", "color"]
+        ),  # too many labels
+    ],
+]
+
+
+# Invalid data which raises TypeError different from native pandas
+# ----------------------------------------------------------------
+# Format: df, invalid axis, invalid labels, and error message.
+# - Invalid axis as list, Series, DataFrame, Index, and MultiIndex objects (Causes TypeError).
+# - Scalar values as labels.
+TEST_DATA_FOR_DF_SET_AXIS_RAISES_TYPE_ERROR = [
+    [
+        native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        [],  # invalid axis
+        ["a", "b", "c"],
+        "list is not a valid type for axis.",
+    ],
+    [
+        native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        [0],  # invalid axis
+        ["I", "II"],
+        "list is not a valid type for axis.",
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        native_pd.Index([]),  # invalid axis
+        ["index"] * 4,
+        "Index is not a valid type for axis.",
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),  # invalid axis
+        [99, 999, 9999, 99999, 999999],
+        "DataFrame is not a valid type for axis.",
+    ],
+    [
+        native_pd.DataFrame(
+            {"echo": ["echo", "echo"], "not an echo": [". . .", ". . ."]}
+        ),
+        native_pd.Series(["0"]),  # invalid axis
+        native_pd.Index([11, 111]),
+        "Series is not a valid type for axis.",
+    ],
+    [  # Labels is a MultiIndex from tuples.
+        native_pd.DataFrame({"A": [1, 2, 3], -2515 / 135: [4, 5, 6]}),
+        native_pd.MultiIndex.from_tuples([], names=["Courses", "Fee"]),  # invalid axis
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA", "rR"), ("r1", "rB", "rS"), ("r2", "rC", "rT")],
+            names=["Courses", "Fee", "Random"],
+        ),
+        "MultiIndex is not a valid type for axis.",
+    ],
+    [  # Labels is a MultiIndex from a product.
+        native_pd.DataFrame({1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6]}),
+        native_pd.MultiIndex.from_product(
+            [[0, 1, 2], ["green", "purple"]], names=["number", "color"]
+        ),  # invalid axis
+        native_pd.MultiIndex.from_product(
+            [[0, 1, 2], ["green", "purple"]], names=["number", "color"]
+        ),
+        "MultiIndex is not a valid type for axis.",
+    ],
+    # labels are None.
+    [
+        native_pd.DataFrame({"A": [], "B": []}),
+        "index",
+        None,  # invalid labels,
+        "None is not a valid value for the parameter 'labels'.",
+    ],
+    [
+        native_pd.DataFrame({1 / 2: ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "columns",
+        None,  # invalid labels,
+        "None is not a valid value for the parameter 'labels'.",
+    ],
+    [
+        native_pd.DataFrame({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "rows",
+        1,  # invalid labels type
+        re.escape(
+            "Index(...) must be called with a collection of some kind, 1 was passed"
+        ),
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        0,
+        ...,  # invalid labels type
+        re.escape(
+            "Index(...) must be called with a collection of some kind, Ellipsis was passed"
+        ),
+    ],
+]
+
+
+# Invalid data which raises ValueError different from native pandas
+# -----------------------------------------------------------------
+# Format: df, axis, and invalid labels.
+# - This data cover the negative case for DataFrame.set_axis() with invalid labels.
+# - Invalid labels here consist of: passing None, too many values, too few values, empty list,
+#   Index, and MultiIndex objects as invalid labels for row-like axis.
+TEST_DATA_FOR_DF_SET_AXIS_RAISES_VALUE_ERROR_DIFF_ERROR_MSG = [
+    # invalid row labels
+    [
+        native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        "index",
+        ["a", "b", "c", "d"],
+        "Length mismatch: Expected 3 rows, received array of length 4",
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        0,
+        [99],  # too few labels,
+        "Length mismatch: Expected 3 rows, received array of length 1",
+    ],
+    [
+        native_pd.DataFrame(
+            {
+                "Artists": ["Monet", "Manet", "Gogh"],
+                "Museums": ["MoMA", "SoMA", "The High"],
+            }
+        ),
+        "rows",
+        [None],  # too few labels
+        "Length mismatch: Expected 3 rows, received array of length 1",
+    ],
+    [
+        native_pd.DataFrame(
+            {"A": ["a", "b", "c", "d", "e"], "B": ["e", "d", "c", "b", "a"]}
+        ),
+        "index",
+        native_pd.Index([11, 111, 1111, 11111, 111111, 11111111]),  # too many labels
+        "Length mismatch: Expected 5 rows, received array of length 6",
+    ],
+    [
+        native_pd.DataFrame({"foo": [None], "bar": [None], "baz": [None]}),
+        0,
+        native_pd.Index([None] * 10),  # too many labels
+        "Length mismatch: Expected 1 rows, received array of length 10",
+    ],
+    [
+        native_pd.DataFrame(
+            {"echo": ["echo", "echo"], "not an echo": [". . .", ". . ."]}
+        ),
+        "rows",
+        native_pd.Index([]),  # too few labels
+        "Length mismatch: Expected 2 rows, received array of length 0",
+    ],
+    [  # Labels is a MultiIndex from tuples.
+        native_pd.DataFrame({"A": [1], -2515 / 135: [4]}),
+        "index",
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA", "rR"), ("r1", "rB", "rS"), ("r2", "rC", "rT")],
+            names=["Courses", "Fee", "Random"],
+        ),  # too many labels
+        "Length mismatch: Expected 1 rows, received array of length 3",
+    ],
+    [  # Labels is a MultiIndex from a DataFrame.
+        native_pd.DataFrame(
+            {
+                "A": [1, 2, 3, 4],
+                "B": [4, 5, 6, 7],
+                "C": [7, 8, 9, 10],
+                "D": [10, 11, 12, 13],
+            }
+        ),
+        "rows",
+        native_pd.MultiIndex.from_frame(
+            native_pd.DataFrame(
+                [],
+                columns=["a", "b"],
+            ),
+        ),  # too few labels
+        "Length mismatch: Expected 4 rows, received array of length 0",
+    ],
+    [  # Labels is a MultiIndex from a product.
+        native_pd.DataFrame({1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6]}),
+        0,
+        native_pd.MultiIndex.from_product(
+            [[0], ["green", "purple"]], names=["number", "color"]
+        ),  # too many labels
+        "Length mismatch: Expected 1 rows, received array of length 2",
+    ],
+]
+
+
+@pytest.mark.parametrize(
+    "native_df, axis, labels, num_queries, num_joins", TEST_DATA_FOR_DF_SET_AXIS
+)
+def test_set_axis_df_copy(native_df, axis, labels, num_queries, num_joins):
+    # Create a copy, perform set_axis on copy, return copy.
+    # Similar to native pandas when copy=True.
+    snowpark_df = pd.DataFrame(native_df)
+    native_res = native_df.set_axis(labels, axis=axis, copy=True)
+
+    if axis in ["columns", 1]:
+        num_joins = 0
+
+    with SqlCounter(query_count=num_queries, join_count=num_joins):
+        snowpark_res = snowpark_df.set_axis(labels, axis=axis)
+
+        # Should return the copy on which set_axis() was performed.
+        assert snowpark_res is not None
+
+        # Results should be the same for Snowpark and Native pandas.
+        assert_axes_result_equal(snowpark_res.axes, native_res.axes)
+
+        # Results should be different from the DataFrame on which set_axis() was performed.
+        with pytest.raises(AssertionError):
+            assert_axes_result_equal(snowpark_res.axes, snowpark_df.axes)
+
+
+# Invalid input tests for DataFrame.set_axis().
+@pytest.mark.parametrize(
+    "native_df, axis, labels", TEST_DATA_FOR_DF_SET_AXIS_RAISES_VALUE_ERROR
+)
+@sql_count_checker(query_count=0)
+def test_set_axis_df_raises_value_error(native_df, axis, labels):
+    # Should raise a ValueError if invalid axis is provided of the expected type
+    # or if the labels for column-like axis are invalid.
+    # The error messages match native pandas.
+    snowpark_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.set_axis(labels, axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df, axis, labels, error_msg",
+    TEST_DATA_FOR_DF_SET_AXIS_RAISES_VALUE_ERROR_DIFF_ERROR_MSG,
+)
+@sql_count_checker(query_count=2)
+def test_set_axis_df_raises_value_error_diff_error_msg(
+    native_df, axis, labels, error_msg
+):
+    # Should raise a ValueError if the labels for row-like axis are invalid.
+    # The error messages do not match native pandas.
+    with pytest.raises(ValueError, match=error_msg):
+        pd.DataFrame(native_df).set_axis(labels, axis=axis)
+
+
+@pytest.mark.parametrize(
+    "native_df, axis, labels, error_msg", TEST_DATA_FOR_DF_SET_AXIS_RAISES_TYPE_ERROR
+)
+@sql_count_checker(query_count=0)
+def test_set_axis_df_raises_type_error_diff_error_msg(
+    native_df, axis, labels, error_msg
+):
+    # Should raise a TypeError if invalid axis is provided of an unexpected type/object
+    # or labels passed in are None. The error messages do not match native pandas.
+    with pytest.raises(TypeError, match=error_msg):
+        pd.DataFrame(native_df).set_axis(labels, axis=axis)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_df_set_axis_copy_true(caplog):
+    # Test that warning is raised when copy argument is used.
+    native_df = native_pd.DataFrame({"A": [1.25], "B": [3]})
+    snowpark_df = pd.DataFrame(native_df)
+
+    caplog.clear()
+    with caplog.at_level(logging.WARNING):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            lambda df: df.set_axis(["hello"], axis=0, copy=True),
+        )
+        assert "keyword is unused and is ignored." in caplog.text
+
+
+@sql_count_checker(query_count=1)
+def test_df_set_axis_copy_false(caplog):
+    # Test that warning is raised when copy argument is used.
+    native_df = native_pd.DataFrame({"A": [1.25], "B": [3]})
+    snowpark_df = pd.DataFrame(native_df)
+
+    caplog.clear()
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.WARNING):
+        eval_snowpark_pandas_result(
+            snowpark_df, native_df, lambda df: df.set_axis([12, 35], axis=1, copy=False)
+        )
+        assert "keyword is unused and is ignored." in caplog.text
+
+
+@sql_count_checker(query_count=6, join_count=3)
+def test_df_set_axis_with_quoted_index():
+    # reported as bug in https://snowflakecomputing.atlassian.net/browse/SNOW-933782
+    data = {"A": [1, 2, 3], "B": [4, 5, 6]}
+    labels = ["'a'", '" "b" "', '""c""']
+
+    helper = lambda df: df.set_axis(labels, axis=0)  # noqa: E731
+
+    # check first that operation result is the same
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result(snow_df, native_df, helper)
+
+    # then, explicitly compare axes
+    with SqlCounter(query_count=1):
+        ans = helper(snow_df)
+
+    native_ans = helper(native_df)
+
+    with SqlCounter(query_count=1):
+        assert_axes_result_equal(ans.axes, native_ans.axes)
+
+    assert list(native_ans.index) == labels
+    with SqlCounter(query_count=1):
+        assert list(ans.index) == labels
diff --git a/tests/integ/modin/frame/test_copy.py b/tests/integ/modin/frame/test_copy.py
new file mode 100644
index 00000000000..b4c5f4f2a55
--- /dev/null
+++ b/tests/integ/modin/frame/test_copy.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.fixture(scope="function")
+def base_dataframe():
+    return pd.DataFrame(
+        {"a": [2, 1], "b": [3, 4]}, index=native_pd.Index([0, 8], name="ind")
+    )
+
+
+@pytest.fixture()
+def snow_df(base_dataframe):
+    return base_dataframe.copy()
+
+
+@pytest.fixture(scope="function")
+def native_df(snow_df):
+    return snow_df.to_pandas()
+
+
+@pytest.mark.parametrize("deep", [None, True, False])
+@sql_count_checker(query_count=1)
+def test_copy(deep, snow_df, native_df):
+    # Verify copy is same as original
+    assert_snowpark_pandas_equal_to_pandas(snow_df.copy(deep=deep), native_df)
+
+
+@sql_count_checker(query_count=0)
+def test_copy_deep_true_column_names(snow_df):
+    snow_df_copy = snow_df.copy(deep=True)
+    snow_df_copy.columns = ["c", "d"]
+
+    assert list(snow_df.columns) == ["a", "b"]
+    assert list(snow_df_copy.columns) == ["c", "d"]
+
+
+@sql_count_checker(query_count=0)
+def test_copy_deep_false_column_names(snow_df):
+    snow_df_copy = snow_df.copy(deep=False)
+    snow_df_copy.columns = ["c", "d"]
+
+    assert list(snow_df.columns) == ["c", "d"]
+    assert list(snow_df_copy.columns) == ["c", "d"]
+
+
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda df: df.insert(0, "c", 1),
+        lambda df: df.sort_values("a", inplace=True),
+        lambda df: df.reset_index(inplace=True),
+        lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_copy_inplace_operations_on_deep_copy(snow_df, native_df, operation):
+    snow_df_copy = snow_df.copy(deep=True)
+    operation(snow_df_copy)
+
+    # Verify that 'snow_df' is unchanged.
+    assert_snowpark_pandas_equal_to_pandas(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda df: df.insert(0, "c", 1),
+        lambda df: df.sort_values("a", inplace=True),
+        lambda df: df.reset_index(inplace=True),
+        lambda df: df.rename(columns={"a": "new_a"}, inplace=True),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_copy_inplace_operations_on_shallow_copy(snow_df, operation):
+    snow_df_copy = snow_df.copy(deep=False)
+    operation(snow_df_copy)
+    # Verify that 'snow_df' is also changed.
+    native_df_copy = snow_df_copy.to_pandas()
+    assert_snowpark_pandas_equal_to_pandas(snow_df, native_df_copy)
diff --git a/tests/integ/modin/frame/test_cumulative.py b/tests/integ/modin/frame/test_cumulative.py
new file mode 100644
index 00000000000..f5750462d55
--- /dev/null
+++ b/tests/integ/modin/frame/test_cumulative.py
@@ -0,0 +1,54 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_LABELS = np.array(["A", "B", "C", "D"])
+TEST_DATA = [[0, 1, 2, 3], [0, 0, 0, 0], [None, 0, None, 0], [None, None, None, None]]
+
+# which original dataframe (constructed from slicing) to test for
+TEST_SLICES = [
+    (slice(None), 0),
+    (slice(None), slice(None)),
+    (slice(None), -1),
+]
+
+
+@pytest.mark.parametrize("axes_slices", TEST_SLICES)
+@pytest.mark.parametrize("func_name", ["cumsum", "cummin", "cummax"])
+@pytest.mark.parametrize("skipna", [True, False])
+def test_dataframe_cumfunc(axes_slices, func_name, skipna):
+    df = pd.DataFrame(
+        pd.DataFrame(TEST_DATA, columns=TEST_LABELS).iloc[
+            axes_slices[0], axes_slices[1]
+        ]
+    )
+    native_df = native_pd.DataFrame(
+        native_pd.DataFrame(TEST_DATA, columns=TEST_LABELS).iloc[
+            axes_slices[0], axes_slices[1]
+        ]
+    )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            df,
+            native_df,
+            lambda df: getattr(df, func_name)(axis=0, skipna=skipna),
+        )
+
+
+@pytest.mark.parametrize("func_name", ["cumsum", "cummin", "cummax"])
+@sql_count_checker(query_count=0)
+def test_dataframe_cumfunc_axis_negative(func_name):
+    df = pd.DataFrame(TEST_DATA, columns=TEST_LABELS)
+    with pytest.raises(ValueError, match="No axis named 2 for object type DataFrame"):
+        getattr(df, func_name)(axis=2)
+    with pytest.raises(NotImplementedError):
+        getattr(df, func_name)(axis=1)
diff --git a/tests/integ/modin/frame/test_describe.py b/tests/integ/modin/frame/test_describe.py
new file mode 100644
index 00000000000..ee4327f65b1
--- /dev/null
+++ b/tests/integ/modin/frame/test_describe.py
@@ -0,0 +1,340 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime as dt
+
+import modin.pandas as pd
+import numpy as np
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_series_equal,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, np.nan], "b": [4, 5, 6]},
+        {"a": [], "b": []},  # Empty columns are numeric by default
+        [[None, -2.4, -3], [4.0, 5.1, -6.7], [7, None, None], [None, None, None]],
+    ],
+)
+# 7 UNIONs occur because we concat 8 query compilers together:
+# count, mean, std, min, 0.25, 0.5, 0.75, max
+@sql_count_checker(query_count=1, union_count=7)
+def test_describe_numeric_only(data):
+    eval_snowpark_pandas_result(*create_test_dfs(data), lambda df: df.describe())
+
+
+@pytest.mark.parametrize(
+    "data, expected_union_count",
+    # 2 UNIONs occur to concatenate 3 query compilers together for count, unique, top/freq;
+    # The following subquery computes top/freq:
+    # - N-1 UNIONs occur for an N-column frame when computing top/freq for each column;
+    # - N UNIONs to NULL-pad columns to handle empty frames (1 single UNION ALL operation, but
+    #   referenced multiple times as a subquery)
+    # The Snowpark pandas implementation of transpose copies this subquery to ensure there is at least one
+    # row, and additionally performs one more UNION ALL to include this row.
+    # In total, we thus have 2 + 2 * (N - 1 + N) + 1 = 4N + 1 UNIONs for an N-column frame.
+    [
+        # If there are multiple modes, return the value that appears first
+        ({"a": ["k", "j", "j", "k"], "b": ["y", "y", "y", "z"]}, 9),
+        # Empty columns are numeric by default (df constructor must explicitly specify object dtype)
+        ({"a": [], "b": []}, 9),
+        # Heterogeneous data is considered non-numeric
+        ({2: ["string", 0, None], -1: [1.1, 2.2, "hello"], 0: [None, None, None]}, 13),
+        (
+            [
+                [None, "quick", None],
+                ["fox", "quick", "lazy"],
+                ["dog", "dog", "lazy"],
+                [None, None, None],
+            ],
+            13,
+        ),
+    ],
+)
+def test_describe_obj_only(data, expected_union_count):
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(data, dtype="O"), lambda df: df.describe(include="all")
+        )
+
+
+@pytest.mark.parametrize(
+    "dtype, expected_union_count", [(int, 7), (float, 7), (object, 9)]
+)
+def test_describe_empty_rows(dtype, expected_union_count):
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_dfs({}, dtype=dtype, columns=["a", "b"]),
+            lambda df: df.describe(include="all"),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_describe_empty_cols():
+    eval_snowpark_pandas_result(
+        *create_test_dfs({}),
+        lambda df: df.describe(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot describe a DataFrame without columns",
+        assert_exception_equal=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "include, exclude, expected_exception, expected_union_count",
+    [
+        # Neither of include/exclude are specified -- only take numeric columns
+        (None, None, None, 7),
+        # *** Only include is specified ***
+        # Since the result has both numeric and object values, we have
+        # 9 UNIONs to concatenate the 10 statistics QCs together
+        # (count, unique, top/freq, mean, std, min, 0.25, 0.5, 0.75, max), with
+        # 4K-1 UNIONs to compute top/freq for K object-dtype columns (see comment on
+        # test_describe_obj_only for reasoning).
+        # Since we have K=2 object columns, the result is 9 + (4 * 2 - 1) = 16 UNIONs.
+        ([int, object], None, None, 16),
+        (np.number, [], None, 7),
+        # Including only datetimes has 1 fewer UNION, since it has 7 statistics
+        # since std is not computed.
+        # (count, mean, min, 0.25, 0.5, 0.75, max)
+        (np.datetime64, [], None, 6),
+        ([int, np.datetime64], [], None, 7),
+        # *** Only exclude is specified ***
+        (None, [int, object], None, 7),
+        # np.datetime64 is not a subtype of np.number, and should still be included
+        (None, [object, "number"], None, 6),
+        # Error if all columns get excluded
+        (None, [object, "number", np.datetime64], ValueError, 0),
+        # When include is "all", exclude must be unspecified
+        ("all", ["O"], ValueError, 0),
+        ("all", [], ValueError, 0),
+        # include and exclude cannot directly overlap
+        ([int, "O"], [float, "O"], ValueError, 0),
+        # Like select_dtypes, a dtype in include/exclude can be a subtype of a dtype in the other
+        ([int, "O"], [float, np.number, np.datetime64], None, 9),
+    ],
+)
+def test_describe_include_exclude(
+    include, exclude, expected_exception, expected_union_count
+):
+    # When include=None and exclude=None, object columns are dropped
+    data = {
+        "int_data_1": [-5, -3, 0, 1, 2],
+        "float_data_1": [np.nan, 1, 0.3, 200, np.nan],
+        "int_data_2": [10, 9, -1, 3, 40],
+        "float_data_2": [np.nan, np.nan, np.nan, np.nan, np.nan],
+        "object_data_1": ["this", "data", "is", "an", "object"],
+        "object_data_2": ["this", "too", "is", "an", "object"],
+        "datetime_data": [
+            dt.datetime(year=1900, month=1, day=1, hour=3, minute=4, second=5),
+            dt.datetime(year=1940, month=4, day=25, hour=0, minute=0, second=1),
+            dt.datetime(year=2000, month=10, day=10, hour=20, minute=20, second=20),
+            dt.datetime(year=2020, month=12, day=31, hour=10, minute=0, second=5),
+            dt.datetime(year=2024, month=2, day=28, hour=19, minute=30, second=50),
+        ],
+    }
+    with SqlCounter(
+        query_count=1 if expected_exception is None else 0,
+        union_count=expected_union_count,
+    ):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(data),
+            lambda df: df.describe(include=include, exclude=exclude),
+            expect_exception=expected_exception is not None,
+            expect_exception_type=expected_exception,
+            assert_exception_equal=expected_exception is not None,
+        )
+
+
+@pytest.mark.parametrize(
+    "include, exclude, expected_exception",
+    [
+        # If all columns are object, then include=None + exclude=None will still include them
+        (None, None, None),
+        ([int, object], None, None),
+        # Error if no columns are selected
+        (np.number, [], ValueError),
+        (None, [object], ValueError),
+    ],
+)
+def test_describe_include_exclude_obj_only(include, exclude, expected_exception):
+    data = {
+        "object_data_1": ["this", "data", "is", "an", "object"],
+        "object_data_2": ["this", "too", "is", "an", "object"],
+    }
+    with SqlCounter(
+        query_count=1 if expected_exception is None else 0,
+        union_count=9 if expected_exception is None else 0,
+    ):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(data),
+            lambda df: df.describe(include=include, exclude=exclude),
+            expect_exception=expected_exception is not None,
+            expect_exception_type=expected_exception,
+            assert_exception_equal=expected_exception is not None,
+        )
+
+
+@pytest.mark.parametrize(
+    "percentiles, expected_union_count",
+    [
+        # We concat count, std, mean, min, max, and 1 QC for each percentile
+        # median is automatically added if it is not present already
+        ([0.1, 0.2, 0.33, 0.432], 9),
+        ([], 5),
+        ([0.1], 6),
+        ([0.5], 5),
+    ],
+)
+def test_describe_percentiles(percentiles, expected_union_count):
+    data = {"a": [1, 2, np.nan], "b": [4, 5, 6]}
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(data), lambda df: df.describe(percentiles)
+        )
+
+
+# Datetime Series have 6 UNIONs for 7 computed statistics.
+# (count, mean, min, 0.25, 0.5, 0.75, max)
+@sql_count_checker(query_count=1, union_count=6)
+def test_describe_timestamps():
+    data = {
+        "timestamps": [
+            pd.NaT,
+            pd.Timestamp("1940-04-25 00:00:01"),
+            pd.Timestamp("2000-10-10 20:20:20"),
+            pd.Timestamp("2020-12-31 10:00:05"),
+        ],
+        "datetimes": [
+            dt.datetime(year=1900, month=1, day=1, hour=3, minute=4, second=5),
+            dt.datetime(year=1940, month=4, day=25, hour=0, minute=0, second=1),
+            dt.datetime(year=2000, month=10, day=10, hour=20, minute=20, second=20),
+            dt.datetime(year=2020, month=12, day=31, hour=10, minute=0, second=5),
+        ],
+    }
+
+    def timestamp_describe_comparator(snow_res, native_res):
+        # atol/rtol arguments of asserters doesn't work for datetimes
+        # Snowflake computed mean is very slightly different from pandas
+        # (1987-05-13 18:06:48.66666668 vs. 1987-05-13 18:06:48.666000)
+        # Perform exact comparison on other rows, and check the delta between means is small
+        snow_to_pandas = snow_res.to_pandas()
+        # assert_frame_equal and assert_series_equal are used here instead of assert_snowpark_pandas*
+        # helpers so we only call to_pandas() a single time
+        assert_frame_equal(snow_to_pandas.drop(["mean"]), native_res.drop(["mean"]))
+        assert_series_equal(
+            snow_to_pandas.loc["mean"].apply(lambda e: e.timestamp()),
+            native_res.loc["mean"].apply(lambda e: e.timestamp()),
+            check_exact=False,
+        )
+
+    eval_snowpark_pandas_result(
+        *create_test_dfs(data),
+        lambda df: df.describe(),
+        comparator=timestamp_describe_comparator,
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(None, id="default_index"),
+        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
+        pytest.param(
+            [
+                np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
+                np.array(["one", "two", "one", "two", "one", "two"]),
+            ],
+            id="2D_index",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        pytest.param(None, id="default_columns"),
+        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_columns"),
+        pytest.param(
+            [
+                np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
+                np.array(["one", "two", "one", "two", "one", "two"]),
+            ],
+            id="2D_columns",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "include, expected_union_count",
+    # Don't need to test all permutations of include/exclude with MultiIndex -- this is covered by
+    # tests for select_dtypes, as well as other tests in this file
+    [
+        ("all", 16),
+        (np.number, 7),
+        (object, 9),
+    ],
+)
+def test_describe_multiindex(index, columns, include, expected_union_count):
+    ints = [-1, -3, 1, 14, 0, 100]
+    floats = [3.1, 4.1, 5.9, 2.6, 5.3, np.nan]
+    objects = [f"data{i}" for i in range(6)]
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            # Use two columns of each datatype to make sure all matched columns are selected
+            # Need to call list(zip) because otherwise the call to pd.DataFrame will consume the zip
+            # before it is passed to native_pd.DataFrame
+            *create_test_dfs(
+                list(zip(ints, floats, objects, ints, floats, objects)),
+                index=index,
+                columns=columns,
+            ),
+            lambda df: df.describe(include=include),
+        )
+
+
+DUP_COL_FAIL_REASON = "SNOW-1019479: describe on frames with mixed object/number columns with the same name fails"
+
+
+@pytest.mark.parametrize(
+    "include, exclude, expected_union_count",
+    [
+        (None, None, 7),
+        pytest.param(
+            "all",
+            None,
+            0,
+            marks=pytest.mark.xfail(strict=True, reason=DUP_COL_FAIL_REASON),
+        ),
+        (np.number, None, 7),
+        pytest.param(
+            None,
+            float,
+            0,
+            marks=pytest.mark.xfail(strict=True, reason=DUP_COL_FAIL_REASON),
+        ),
+        (object, None, 5),
+        (None, object, 7),
+        (int, float, 7),
+        (float, int, 7),
+    ],
+)
+def test_describe_duplicate_columns(include, exclude, expected_union_count):
+    # Tests describing a 3-column frame where one column is int, one is float, and one is object,
+    # and all columns have the same name
+    data = [[1, np.nan, "string"], [2, -4.1, "string"]]
+    columns = ["col"] * 3
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(data, columns=columns),
+            lambda df: df.describe(include=include, exclude=exclude),
+        )
diff --git a/tests/integ/modin/frame/test_diff.py b/tests/integ/modin/frame/test_diff.py
new file mode 100644
index 00000000000..26aa5b74c85
--- /dev/null
+++ b/tests/integ/modin/frame/test_diff.py
@@ -0,0 +1,309 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+NUM_COLS_TALL_DF = 4
+NUM_ROWS_TALL_DF = 32
+
+
+@sql_count_checker(query_count=0)
+def test_df_diff_invalid_periods_negative():
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.diff(periods=0.5),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="periods must be an integer",
+        assert_exception_equal=True,
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.diff(periods="1"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="periods must be an integer",
+        assert_exception_equal=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "periods",
+    [
+        -1,
+        0,
+        1,
+        NUM_ROWS_TALL_DF / 2,
+        -1 * NUM_ROWS_TALL_DF / 2,
+        NUM_ROWS_TALL_DF - 1,
+        -1 * (NUM_ROWS_TALL_DF - 1),
+        NUM_ROWS_TALL_DF,
+        -1 * NUM_ROWS_TALL_DF,
+    ],
+    ids=[
+        "with_row_after",
+        "with_self",
+        "with_row_before",
+        "with_len(df)/2_rows_before",
+        "with_len(df)/2_rows_after",
+        "with_len(df)-1_rows_before",
+        "with_len(df)-1_rows_after",
+        "with_len(df)_rows_before",
+        "with_len(df)_rows_after",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_row_wise_diff_all_int_df(periods):
+    native_df = native_pd.DataFrame(
+        np.arange(NUM_ROWS_TALL_DF * NUM_COLS_TALL_DF).reshape(
+            (NUM_ROWS_TALL_DF, NUM_COLS_TALL_DF)
+        ),
+        columns=["A", "B", "C", "D"],
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.diff(periods),
+    )
+
+
+@pytest.mark.parametrize(
+    "periods",
+    [
+        -1,
+        0,
+        1,
+        NUM_COLS_TALL_DF / 2,
+        -1 * NUM_COLS_TALL_DF / 2,
+        NUM_COLS_TALL_DF - 1,
+        -1 * (NUM_COLS_TALL_DF - 1),
+        NUM_COLS_TALL_DF,
+        -1 * NUM_COLS_TALL_DF,
+    ],
+    ids=[
+        "with_col_after",
+        "with_self",
+        "with_col_before",
+        "with_len(df.columns)/2_cols_before",
+        "with_len(df.columns)/2_cols_after",
+        "with_len(df.columns)-1_cols_before",
+        "with_len(df.columns)-1_cols_after",
+        "with_len(df.columns)_cols_before",
+        "with_len(df.columns)_cols_after",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_col_wise_diff_all_int_df(periods):
+    native_df = native_pd.DataFrame(
+        np.arange(NUM_ROWS_TALL_DF * NUM_COLS_TALL_DF).reshape(
+            (NUM_ROWS_TALL_DF, NUM_COLS_TALL_DF)
+        ),
+        columns=["A", "B", "C", "D"],
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.diff(periods, axis=1),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1])
+def test_df_diff_bool_df(periods):
+    native_df = native_pd.DataFrame(
+        np.arange(NUM_ROWS_TALL_DF * NUM_COLS_TALL_DF).reshape(
+            (NUM_ROWS_TALL_DF, NUM_COLS_TALL_DF)
+        ),
+        columns=["A", "B", "C", "D"],
+    )
+    native_df = native_df.astype({"A": bool, "C": bool})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff(periods=periods))
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1])
+def test_df_diff_int_and_bool_df(periods):
+    native_df = native_pd.DataFrame(
+        np.arange(NUM_ROWS_TALL_DF * NUM_COLS_TALL_DF).reshape(
+            (NUM_ROWS_TALL_DF, NUM_COLS_TALL_DF)
+        ),
+        columns=["A", "B", "C", "D"],
+    )
+    native_df = native_df.astype({"A": bool, "C": bool})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.diff(axis=1, periods=periods)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_df_diff_bools_as_variants():
+    native_df = native_pd.DataFrame([[True, False], [False, True]], dtype=object)
+    snow_df = pd.DataFrame(native_df).astype(object)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff())
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1])
+def test_df_diff_bools_as_variants_and_ints(periods):
+    native_df = native_pd.DataFrame([[True, 1], [False, 2]])
+    native_df = native_df.astype({0: object})
+    snow_df = pd.DataFrame(native_df).astype({0: object})
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.diff(axis=1, periods=periods)
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1])
+def test_df_diff_bools_and_ints_as_variants(periods):
+    native_df = native_pd.DataFrame([[True, 1], [False, 2]], dtype=object)
+    snow_df = pd.DataFrame(native_df).astype(object)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.diff(axis=1, periods=periods)
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_df_diff_custom_variant_type_negative():
+    class CustomObject(dict):
+        def __rsub__(self, other):
+            if isinstance(other, bool):
+                return 3
+            elif isinstance(other, type(self)):
+                return "self"
+            else:
+                return 4
+
+        def __sub__(self, other):
+            if isinstance(other, bool):
+                return -3
+            elif isinstance(other, type(self)):
+                return "self"
+            else:
+                return -4
+
+    native_df = native_pd.DataFrame(
+        [
+            [True, CustomObject(), False],
+            [False, CustomObject(), True],
+            [True, CustomObject(), False],
+        ]
+    )
+    snow_df = pd.DataFrame(native_df)
+    with pytest.raises(
+        SnowparkSQLException,
+        match=r"Invalid argument types for function '-': \(OBJECT, OBJECT\)",
+    ):
+        snow_df.diff().to_pandas()
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@sql_count_checker(query_count=1)
+def test_df_diff_mixed_variant_columns(axis):
+    native_df = native_pd.DataFrame(
+        [[True, 1, False], [1, np.nan, 3], [False, 4, None]]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    if axis == 1:
+        eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff(axis=axis))
+    elif axis == 0:
+        # When axis=0, pandas fails because we try to subtract
+        # None and an int.
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_df.diff(axis=axis),
+            native_pd.DataFrame(
+                [[np.nan, np.nan, np.nan], [0.0, np.nan, 3.0], [-1.0, np.nan, np.nan]]
+            ),
+        )
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@sql_count_checker(query_count=1)
+def test_df_diff_bool_df_with_missing_values(axis):
+    native_df = native_pd.DataFrame(
+        [[True, None, False], [None, False, True], [True, None, None]]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # When axis=0, pandas errors out because it attempts to subtract `None` from a bool type.
+    # Snowpark pandas; however, computes the correct discrete difference (i.e. setting the
+    # value to None.)
+    if axis == 1:
+        eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff(axis=axis))
+    else:
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_df.diff(axis=0),
+            native_pd.DataFrame(
+                [[None, None, None], [None, None, True], [None, None, None]]
+            ),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_df_diff_string_type_negative():
+    native_df = native_pd.DataFrame(
+        [
+            ["a", "b", "c"],
+            ["d", "e", "f"],
+        ]
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.diff(),
+        expect_exception=True,
+        expect_exception_match="Numeric value 'a' is not recognized",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=6)
+def test_df_diff_strided_column_access():
+    import string
+
+    native_df = native_pd.DataFrame(
+        np.arange(26 * 1000).reshape((1000, 26)), columns=list(string.ascii_lowercase)
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[list(string.ascii_lowercase)[::2]].diff(axis=1),
+    )
+
+
+@sql_count_checker(query_count=6)
+def test_df_diff_strided_row_access():
+    import string
+
+    native_df = native_pd.DataFrame(
+        np.arange(26 * 1000).reshape((1000, 26)), columns=list(string.ascii_lowercase)
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.iloc[::2].diff())
diff --git a/tests/integ/modin/frame/test_drop.py b/tests/integ/modin/frame/test_drop.py
new file mode 100644
index 00000000000..aee61fb69a4
--- /dev/null
+++ b/tests/integ/modin/frame/test_drop.py
@@ -0,0 +1,367 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas import Index, MultiIndex
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def native_df():
+    index = [1, 2, 3]
+    columns = ["red", "green", "pink"]
+    return native_pd.DataFrame(
+        [[1, "one", True], [2, "two", True], [3, "three", False]],
+        index=index,
+        columns=columns,
+    )
+
+
+@pytest.fixture(scope="function")
+def snow_df():
+    index = [1, 2, 3]
+    columns = ["red", "green", "pink"]
+    return pd.DataFrame(
+        [[1, "one", True], [2, "two", True], [3, "three", False]],
+        index=index,
+        columns=columns,
+    )
+
+
+@pytest.fixture(scope="function")
+def multiindex_snow_df():
+    index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
+    columns = MultiIndex.from_arrays([["red", "green"], [1, 2]], names=["x", "y"])
+    return pd.DataFrame(
+        [[1, "one"], [2, "two"], [3, "three"]], index=index, columns=columns
+    )
+
+
+@pytest.fixture(scope="function")
+def multiindex_native_df():
+    index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
+    columns = MultiIndex.from_arrays([["red", "green"], [1, 2]], names=["x", "y"])
+    return native_pd.DataFrame(
+        [[1, "one"], [2, "two"], [3, "three"]], index=index, columns=columns
+    )
+
+
+@pytest.fixture(params=[0, 1])
+def axis(request):
+    """
+    axis argument to pass to drop.
+    """
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "labels", [Index(["red", "green"]), np.array(["red", "green"])]
+)
+@sql_count_checker(query_count=1)
+def test_drop_list_like(native_df, labels):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.drop(labels, axis=1))
+
+
+@pytest.mark.parametrize(
+    "labels, axis, expected_query_count",
+    [
+        ([], 0, 1),
+        (1, 0, 2),
+        ([1, 2], 0, 3),
+        ([3, 1, 2], 0, 4),
+        ([1, 1], 0, 3),
+        ([], 1, 1),
+        ("red", 1, 1),
+        (["red", "green"], 1, 1),
+        (["green", "pink", "red"], 1, 1),
+        (["red", "red"], 1, 1),
+    ],
+)
+@pytest.mark.parametrize("inplace", [True, False])
+def test_drop_axis(native_df, labels, axis, inplace, expected_query_count):
+    snow_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df.copy(),
+            native_df,
+            lambda df: df.drop(labels, axis=axis, inplace=inplace),
+            inplace=inplace,
+        )
+
+
+@pytest.mark.parametrize("labels", [[], "red", "green", ["red", "red"]])
+@sql_count_checker(query_count=1)
+def test_drop_duplicate_columns(native_df, labels):
+    snow_df = pd.DataFrame(native_df)
+    native_df = native_df.rename(columns={"pink": "red"})
+    snow_df = snow_df.rename(columns={"pink": "red"})
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.drop(labels, axis=1))
+
+
+@pytest.mark.parametrize(
+    "labels, expected_query_count, expected_join_count",
+    [([], 3, 1), (1, 4, 2), (2, 4, 2), ([1, 2], 5, 3)],
+)
+def test_drop_duplicate_row_labels(
+    native_df, labels, expected_query_count, expected_join_count
+):
+    snow_df = pd.DataFrame(native_df)
+    native_df = native_df.set_index([[1, 2, 1]])
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        snow_df = snow_df.set_index([[1, 2, 1]])
+        eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.drop(labels))
+
+
+@pytest.mark.parametrize(
+    "msg, labels, level",
+    [
+        (r"labels \[1\] not found in level", 1, "x"),  # refer level by name
+        (r"labels \['red'\] not found in level", "red", "y"),  # refer level by name
+        (r"labels \[1\] not found in level", 1, 0),  # refer level by int position
+        (
+            r"labels \['red'\] not found in level",
+            "red",
+            1,
+        ),  # refer level by int position
+        (r"labels \[\(1,\)\] not found in level", (1,), 1),  # tuple label
+        (r"labels \['blue'\] not found in axis", "blue", None),  # no level
+        (r"labels \[3\] not found in axis", 3, None),  # no level
+        (r"labels \[\(1, 2\)\] not found in axis", (1, 2), None),  # tuple label
+        (r"labels \[1, 2, 3\] not found in axis", [1, 2, 3], None),  # list label
+        (
+            r"labels \['blue'\] not found in axis",
+            ["red", "blue"],
+            None,
+        ),  # list with one missing
+        (r"labels \[\(\)\] not found in level", [()], 0),  # empty tuple against level
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_drop_invalid_labels_axis1_negative(
+    msg, labels, level, multiindex_snow_df, multiindex_native_df
+):
+    with pytest.raises(KeyError, match=msg):
+        multiindex_snow_df.drop(labels, level=level, axis=1)
+    # Snowpark pandas message is slight different for consistency. Verify native pandas
+    # also throw 'KeyError'
+    with pytest.raises(KeyError):
+        multiindex_native_df.drop(labels, level=level, axis=1)
+
+
+@pytest.mark.parametrize(
+    "msg, labels, level, expected_query_count",
+    [
+        (r"labels \[None\] not found in axis", [None], None, 1),  # None as label
+        (r"labels \[4\] not found in level", 4, "a", 1),  # refer level by name
+        (r"labels \[7\] not found in level", 7, "b", 1),  # refer level by name
+        (r"labels \[4\] not found in level", 4, 0, 1),  # refer level by int position
+        (r"labels \[7\] not found in level", 7, 1, 1),  # refer level by int position
+        (r"labels \[4\] not found in axis", 4, None, 1),  # no level
+        (r"labels \[\(1, 2\)\] not found in axis", (1, 2), None, 1),  # tuple label
+        (r"labels \[4, 5, 7\] not found in axis", [4, 5, 7], None, 3),  # list label
+        (r"labels \[7\] not found in axis", [1, 7], None, 2),  # list with one missing
+    ],
+)
+def test_drop_invalid_labels_axis0_negative(
+    msg, labels, level, expected_query_count, multiindex_snow_df, multiindex_native_df
+):
+    # Query is generated for each label dropped
+    with SqlCounter(query_count=expected_query_count):
+        with pytest.raises(KeyError, match=msg):
+            multiindex_snow_df.drop(labels, level=level)
+        # Snowpark pandas message is slight different for consistency. Verify native pandas
+        # also throw 'KeyError'
+        with pytest.raises(KeyError):
+            multiindex_native_df.drop(labels, level=level, axis=1)
+
+
+@pytest.mark.parametrize(
+    "labels, level",
+    [
+        (1, "x"),  # refer level by name
+        ("red", "y"),  # refer level by name
+        (1, 0),  # refer level by int position
+        ("red", 1),  # refer level by int position
+        ("blue", None),  # no level
+        (3, None),  # no level
+        ((1, 2), None),  # tuple label
+        ([1, 2], None),  # list label
+        ([], None),  # empty labels
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_drop_invalid_axis1_labels_errors_ignore(labels, level, multiindex_snow_df):
+    result = multiindex_snow_df.drop(labels, level=level, axis=1, errors="ignore")
+    assert_frame_equal(multiindex_snow_df, result)
+
+
+@pytest.mark.parametrize(
+    "labels, level",
+    [
+        (4, "a"),  # refer level by name
+        (7, "b"),  # refer level by name
+        (4, 0),  # refer level by int position
+        (
+            7,
+            1,
+        ),  # refer level by int position
+        (4, None),  # no level
+        ((1, 2), None),  # tuple label
+        ([4, 5, 7], None),  # list label
+        ([], None),  # empty labels
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_drop_invalid_axis0_labels_errors_ignore(labels, level, multiindex_snow_df):
+    result = multiindex_snow_df.drop(labels, level=level, errors="ignore")
+    assert_frame_equal(multiindex_snow_df, result)
+
+
+@sql_count_checker(query_count=0)
+def test_empty_tuple_single_index_negative(native_df):
+    msg = r"\[\(\)\] not found in axis"
+    snow_df = pd.DataFrame(native_df)
+    with pytest.raises(KeyError, match=msg):
+        snow_df.drop((), axis=1)
+    # Snowpark pandas message is slight different for consistency. Verify native pandas
+    # also throw 'KeyError'
+    with pytest.raises(KeyError):
+        native_df.drop((), axis=1)
+
+
+def test_empty_tuple_multiindex(multiindex_snow_df, axis):
+    # Empty tuple should match with every label if column index is multiindex
+    if axis == 1:
+        with SqlCounter(query_count=1):
+            result = multiindex_snow_df.drop((), axis=axis)
+            assert len(result.columns) == 0
+            assert len(result.index) == 3
+    else:
+        with SqlCounter(query_count=2):
+            result = multiindex_snow_df.drop((), axis=axis)
+            assert len(result.columns) == 2
+            assert len(result.index) == 0
+
+
+@sql_count_checker(query_count=2)
+def test_drop_preserve_index_names(multiindex_snow_df):
+    df_dropped_e = multiindex_snow_df.drop("red", axis=1)
+    df_inplace_e = multiindex_snow_df.copy()
+    return_value = df_inplace_e.drop("red", axis=1, inplace=True)
+    assert return_value is None
+    for obj in (df_dropped_e, df_inplace_e):
+        assert obj.index.names == ["a", "b"]
+        assert obj.columns.names == ["x", "y"]
+
+
+@pytest.mark.parametrize(
+    "op1, op2, expected_query_count",
+    [
+        (lambda df: df.drop("red", axis=1), lambda df: df.drop(columns="red"), 2),
+        (
+            lambda df: df.drop(labels="green", axis=1),
+            lambda df: df.drop(columns="green"),
+            2,
+        ),
+        (
+            lambda df: df.drop("red", axis=1),
+            lambda df: df.drop("red", axis="columns"),
+            2,
+        ),
+        (lambda df: df.drop(1, axis=0), lambda df: df.drop(index=1), 4),
+        (lambda df: df.drop(1), lambda df: df.drop(index=[1]), 4),
+        (lambda df: df.drop(labels=2, axis="index"), lambda df: df.drop(index=[2]), 4),
+        (
+            lambda df: df.drop(index=1, columns="red"),
+            lambda df: df.drop(index=1).drop(columns="red"),
+            4,
+        ),
+    ],
+)
+def test_drop_api_equivalence(snow_df, op1, op2, expected_query_count):
+    # equivalence of the labels/axis and index/columns API's
+    # query count for each drop operation is higher when dropping from the index because an extra
+    # query needed is needed to check whether the label(s) are present in the index
+    with SqlCounter(query_count=expected_query_count):
+        res1 = op1(snow_df)
+        res2 = op2(snow_df)
+        assert_frame_equal(res1, res2)
+
+
+@sql_count_checker(query_count=0)
+def test_misconfigured_input(snow_df):
+    msg = "Cannot specify both 'labels' and 'index'/'columns'"
+    with pytest.raises(ValueError, match=msg):
+        snow_df.drop(labels="a", index="b")
+
+    with pytest.raises(ValueError, match=msg):
+        snow_df.drop(labels="a", columns="b")
+
+    msg = "Need to specify at least one of 'labels', 'index' or 'columns'"
+    with pytest.raises(ValueError, match=msg):
+        snow_df.drop(axis=1)
+
+
+@pytest.mark.parametrize(
+    "labels",
+    [
+        "a",
+        ("a", "", ""),
+        ["top"],
+        [("top", "OD", "wx"), ("top", "OD", "wy")],
+        ("top", "OD", "wx"),
+    ],
+)
+def test_mixed_depth_drop(labels, axis):
+    arrays = [
+        ["a", "top", "top", "routine1", "routine1", "routine2"],
+        ["", "OD", "OD", "result1", "result2", "result1"],
+        ["", "wx", "wy", "", "", ""],
+    ]
+    expected_query_count = 2
+    tuples = sorted(zip(*arrays))
+    mi = MultiIndex.from_tuples(tuples)
+    if axis == 0:
+        df = pd.DataFrame([1, 2, 3, 4, 5, 6], index=mi)
+        expected_query_count = 3
+        if labels == [("top", "OD", "wx"), ("top", "OD", "wy")]:
+            expected_query_count = 4
+    else:
+        df = pd.DataFrame([[1, 2, 3, 4, 5, 6]], columns=mi)
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            df, df.to_pandas(), lambda d: d.drop(labels, axis=axis)
+        )
+
+
+@pytest.mark.parametrize(
+    "labels, level", [("a", None), ("result1", 1), ("", 2), ("top", "a"), ("OD", "b")]
+)
+def test_drop_level(labels, level, axis):
+    arrays = [
+        ["a", "top", "top", "routine1", "routine1", "routine2"],
+        ["", "OD", "OD", "result1", "result2", "result1"],
+        ["", "wx", "wy", "", "", ""],
+    ]
+
+    tuples = sorted(zip(*arrays))
+    mi = MultiIndex.from_tuples(tuples, names=["a", "b", "c"])
+    if axis == 0:
+        expected_query_count = 3
+        df = pd.DataFrame([1, 2, 3, 4, 5, 6], index=mi)
+    else:
+        expected_query_count = 2
+        df = pd.DataFrame([[1, 2, 3, 4, 5, 6]], columns=mi)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            df, df.to_pandas(), lambda d: d.drop(labels, axis=axis, level=level)
+        )
diff --git a/tests/integ/modin/frame/test_drop_duplicates.py b/tests/integ/modin/frame/test_drop_duplicates.py
new file mode 100644
index 00000000000..1fce4ff4f0f
--- /dev/null
+++ b/tests/integ/modin/frame/test_drop_duplicates.py
@@ -0,0 +1,77 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+
+
+@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"], []])
+def test_drop_duplicates_with_misspelled_column_name_or_empty_subset(subset):
+    df = native_pd.DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
+    # pandas explicitly check existence of the values from subset while Snowpark pandas does not since duplicated API is
+    # using getitem to fetch the column(s)
+    with pytest.raises((KeyError, ValueError)):
+        df.drop_duplicates(subset)
+    expected_res = (
+        df.drop_duplicates(["B"])
+        if "B" in subset
+        else native_pd.DataFrame(columns=["A", "B", "C"])
+    )
+    query_count = 1
+    join_count = 1
+    if "B" in subset:
+        join_count += 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        assert_frame_equal(
+            pd.DataFrame(df).drop_duplicates(subset),
+            expected_res,
+            check_dtype=False,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("subset", ["A", ["A"], ["B"], ["A", "B"]])
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+def test_drop_duplicates(subset, keep, ignore_index):
+    pandas_df = native_pd.DataFrame(
+        {"A": [0, 1, 1, 2, 0], "B": ["a", "b", "c", "b", "a"]}
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    query_count = 1
+    join_count = 2
+    if ignore_index is True:
+        query_count += 2
+        join_count += 3
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        assert_frame_equal(
+            snow_df.drop_duplicates(
+                subset=subset, keep=keep, ignore_index=ignore_index
+            ),
+            pandas_df.drop_duplicates(
+                subset=subset, keep=keep, ignore_index=ignore_index
+            ),
+            check_dtype=False,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("subset", ["A", ["A"], ["B"], ["A", "B"]])
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@sql_count_checker(query_count=1, join_count=1)
+def test_drop_duplicates_on_empty_frame(subset, keep):
+    pandas_df = native_pd.DataFrame(columns=["a", "b"])
+    snow_df = pd.DataFrame(pandas_df)
+
+    assert_frame_equal(
+        snow_df.drop_duplicates(subset=subset, keep=keep),
+        pandas_df.drop_duplicates(subset=subset, keep=keep),
+        check_dtype=False,
+        check_index_type=False,
+    )
diff --git a/tests/integ/modin/frame/test_dropna.py b/tests/integ/modin/frame/test_dropna.py
new file mode 100644
index 00000000000..3c600c70070
--- /dev/null
+++ b/tests/integ/modin/frame/test_dropna.py
@@ -0,0 +1,196 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def test_dropna_df():
+    return native_pd.DataFrame(
+        {
+            "name": ["Alfred", "Batman", "Catwoman"],
+            "toy": [np.nan, "Batmobile", "Bullwhip"],
+            "born": [pd.NaT, pd.Timestamp("1940-04-25"), pd.NaT],
+        }
+    )
+
+
+@sql_count_checker(query_count=5)
+def test_basic_arguments(test_dropna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(),
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(how="any"),
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(how="all"),
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(subset=["toy"]),
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(thresh=1),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_df_with_index(test_dropna_df):
+    test_dropna_df = test_dropna_df.set_index(["toy"])
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_how_all_with_subset(test_dropna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(how="all", subset=["name", "toy"]),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_axis_1_fallback(test_dropna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(axis="columns"),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dropna_negative(test_dropna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(axis=[]),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match="supplying multiple axes to axis is no longer supported",
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(how="invalid"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="invalid how option",
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(how="any", thresh=1),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match="You cannot set both the how and thresh arguments at the same time",
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(subset=["invalid"]),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match="['invalid']",
+    )
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_dropna_df),
+        test_dropna_df,
+        lambda df: df.dropna(subset=["invalid"], axis=1),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match="['invalid']",
+    )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        native_pd.DataFrame(
+            {
+                "name": ["Alfred", "Batman", "Catwoman"],
+                "toy": [np.nan, "Batmobile", "Bullwhip"],
+                "born": [pd.NaT, pd.Timestamp("1940-04-25"), pd.NaT],
+            }
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=4, union_count=1)
+def test_dropna_iloc(df):
+    # Check that dropna() generates a new index correctly for iloc.
+    # 1 join for iloc, 2 joins generated by to_pandas methods during eval.
+    eval_snowpark_pandas_result(
+        pd.DataFrame(df).dropna(),
+        df.dropna(),
+        lambda _df: _df.iloc[0],
+    )
+
+
+TEST_DATA_FOR_SORTING_DF = [
+    native_pd.DataFrame(
+        {
+            "col0": ["snooze", "zzzzz", ". z . . .z ."],
+            "echo": ["echo 2", "echo 1", "foxtrot"],
+            "not an echo": ["99. .32. 467. . .", ".1 .3 3 5.", "2.5 . 3.7"],
+        }
+    ),
+    native_pd.DataFrame(
+        {
+            22 / 7: [1, np.nan, -2, 3, np.nan, None],
+            "col0": [4, np.nan, 5, 6, np.nan, np.nan],
+        }
+    ),
+    native_pd.DataFrame(
+        {
+            "col0": [None, "a", "b", "c", None, "d", "e"],
+            "B": [None, None, "e", "d", "c", "b", "a"],
+        }
+    ),
+]
+
+
+@pytest.mark.parametrize("df", TEST_DATA_FOR_SORTING_DF)
+@sql_count_checker(query_count=1)
+def test_dropna_sort_values(df):
+    # Test data that does not start with a row_position_column due to sorting.
+    eval_snowpark_pandas_result(
+        pd.DataFrame(df).sort_values(by="col0"),
+        df.sort_values(by="col0"),
+        lambda _df: _df.dropna(),
+    )
diff --git a/tests/integ/modin/frame/test_dtypes.py b/tests/integ/modin/frame/test_dtypes.py
new file mode 100644
index 00000000000..605d9983739
--- /dev/null
+++ b/tests/integ/modin/frame/test_dtypes.py
@@ -0,0 +1,535 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.types import (
+    ArrayType,
+    DataType,
+    DoubleType,
+    LongType,
+    MapType,
+    StringType,
+    VariantType,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+def validate_series_snowpark_dtype(series: pd.Series, snowpark_type: DataType) -> None:
+    internal_frame = series._query_compiler._modin_frame
+    snowpark_type_map = internal_frame.quoted_identifier_to_snowflake_type()
+    snowpark_dtypes = [
+        snowpark_type_map[quoted_identifier]
+        for quoted_identifier in internal_frame.data_column_snowflake_quoted_identifiers
+    ]
+    assert len(snowpark_dtypes) == 1
+    assert snowpark_dtypes[0] == snowpark_type
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, expected_dtype, logical_dtype",
+    [
+        ([False, True, False], np.dtype("bool"), np.dtype("bool"), np.dtype("bool")),
+        ([1, 2, 3], np.dtype("int8"), np.dtype("int64"), np.dtype("int64")),
+        ([1, 2, 3], np.dtype("int16"), np.dtype("int64"), np.dtype("int64")),
+        ([1, 2, 3], np.dtype("int32"), np.dtype("int64"), np.dtype("int64")),
+        ([1, 2, 3], np.dtype("int64"), np.dtype("int64"), np.dtype("int64")),
+        (
+            [1 << 10, 2 << 10, 3 << 10],
+            np.dtype("int16"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+        (
+            [1 << 10, 2 << 10, 3 << 10],
+            np.dtype("int32"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+        (
+            [1 << 10, 2 << 10, 3 << 10],
+            np.dtype("int64"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+        (
+            [1 << 20, 2 << 20, 3 << 20],
+            np.dtype("int32"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+        (
+            [1 << 20, 2 << 20, 3 << 20],
+            np.dtype("int64"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+        (
+            [1 << 40, 2 << 40, 3 << 40],
+            np.dtype("int64"),
+            np.dtype("int64"),
+            np.dtype("int64"),
+        ),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_integer(dataframe_input, input_dtype, expected_dtype, logical_dtype):
+    expected = native_pd.Series(dataframe_input, dtype=input_dtype)
+    created = pd.Series(dataframe_input, dtype=input_dtype)
+    assert created.dtype == logical_dtype
+    roundtripped = created.to_pandas()
+    assert_series_equal(
+        roundtripped, expected, check_dtype=False, check_index_type=False
+    )
+    assert roundtripped.dtype == expected_dtype
+
+    expected = native_pd.DataFrame(
+        dataframe_input,
+        columns=["col"],
+        index=native_pd.Index(data=[101, 102, 103], dtype=np.int64),
+    )
+    created = pd.DataFrame(
+        dataframe_input,
+        columns=["col"],
+        index=native_pd.Index(data=[101, 102, 103], dtype=np.int64),
+    )
+    assert_series_equal(
+        created.dtypes, native_pd.Series([logical_dtype], index=["col"])
+    )
+    roundtripped = created.to_pandas()
+    assert_frame_equal(
+        roundtripped, expected, check_dtype=False, check_index_type=False
+    )
+    assert_series_equal(
+        roundtripped.dtypes, native_pd.Series([expected_dtype], index=["col"])
+    )
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, expected_dtype",
+    [
+        ([pd.Timedelta("1 day"), None], np.dtype("float64")),
+        ([pd.Timedelta("1 day")], np.dtype("int64")),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_timedelta(dataframe_input, expected_dtype):
+    with pytest.raises(NotImplementedError):
+        pd.Series(dataframe_input, dtype="timedelta64[ns]").to_pandas()
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, snowpark_dtype, logical_dtype",
+    [
+        ([1, 2, None], None, DoubleType(), np.dtype("float64")),
+        ([1, 2, None], "Int64", LongType(), np.dtype("int64")),
+        ([1, 2, None], "UInt64", LongType(), np.dtype("int64")),
+        ([1, 2, None], "Int32", LongType(), np.dtype("int64")),
+        ([1, 2, None], "UInt32", LongType(), np.dtype("int64")),
+        ([1, 2, None], "Int16", LongType(), np.dtype("int64")),
+        ([1, 2, None], "UInt16", LongType(), np.dtype("int64")),
+        ([1, 2, None], "Int8", LongType(), np.dtype("int64")),
+        ([1, 2, None], "UInt8", LongType(), np.dtype("int64")),
+        ([1.0, 2.0, None], "Float32", DoubleType(), np.dtype("float64")),
+        ([1.0, 2.0, None], "Float64", DoubleType(), np.dtype("float64")),
+        # test cases for different pandas missing value representations, includes
+        # np.nan, pd.NA, None. The missing values will be all mapped to snowflake NULL
+        ([1, 2, pd.NA, None, pd.NA], "Int64", LongType(), np.dtype("int64")),
+        (
+            [1.0, 2.0, None, np.nan, "NaN", pd.NA],
+            "Float64",
+            DoubleType(),
+            np.dtype("float64"),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_nullable_extension(
+    dataframe_input, input_dtype, snowpark_dtype, logical_dtype
+):
+    expected = native_pd.Series(dataframe_input, dtype=input_dtype)
+    assert expected.dtype == input_dtype
+    created = pd.Series(dataframe_input, dtype=input_dtype)
+    assert created.dtype == logical_dtype
+    validate_series_snowpark_dtype(created, snowpark_dtype)
+    roundtripped = created.to_pandas()
+    # Note that this behavior is very unfortunate.
+    assert roundtripped.dtype == np.dtype("float64")
+    assert_series_equal(
+        roundtripped, expected.astype("float64"), check_index_type=False
+    )
+
+
+@sql_count_checker(query_count=3)
+def test_extended_float64_with_nan():
+    # When creating native pandas series with the following data using
+    # native_pd.Series([1.0, pd.NA, None, 2.0, np.nan, 3.0, 'NaN'], dtype="Float64")
+    # the result will become [1.0, <NA>, <NA>, 2.0, <NA>, 3.9, NaN], where pd.NA, None,
+    # and np.nan all becomes missing value NA, but 'NaN' will be mapped to Not A Number.
+    # Such information will be retained when we write back to Snowflake.
+    snow_se = pd.Series([1.0, pd.NA, None, 2.0, np.nan, 3.0, "NaN"], dtype="Float64")
+    snowpark_res = snow_se._query_compiler._modin_frame.ordered_dataframe.collect()
+    # verify that pd.NA, None, and np.nan are all mapped to NULL in snowflake, which is
+    # represented as None in snowpark
+    assert snowpark_res[0][1] == 1.0
+    assert snowpark_res[1][1] is None
+    assert snowpark_res[2][1] is None
+    assert snowpark_res[3][1] == 2.0
+    assert snowpark_res[4][1] is None
+    assert snowpark_res[5][1] == 3.0
+    # verify that 'NaN' in pandas extend float64 dtype is mapped to 'NaN' in snowflake,
+    # which is mapped to np.nan in Snowpark
+    assert snowpark_res[6][1] is not None
+    assert np.isnan(snowpark_res[6][1])
+
+    # verify that to_pandas mapped all pd.NA, None, np.nan, 'NaN' to np.nan
+    expected = pd.Series(
+        [1.0, np.nan, np.nan, 2.0, np.nan, 3.0, np.nan], dtype=np.dtype("float64")
+    )
+    assert_series_equal(snow_se.to_pandas(), expected)
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, expected_dtype, logical_dtype",
+    [
+        (
+            [1.1, 2.2, 3.3],
+            np.dtype("float16"),
+            np.dtype("float64"),
+            np.dtype("float64"),
+        ),
+        (
+            [1.1, 2.2, 3.3],
+            np.dtype("float32"),
+            np.dtype("float64"),
+            np.dtype("float64"),
+        ),
+        (
+            [1.1, 2.2, 3.3],
+            np.dtype("float64"),
+            np.dtype("float64"),
+            np.dtype("float64"),
+        ),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_float(dataframe_input, input_dtype, expected_dtype, logical_dtype):
+    expected = native_pd.Series(dataframe_input, dtype=input_dtype)
+    created = pd.Series(dataframe_input, dtype=input_dtype)
+    assert created.dtype == logical_dtype
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created, expected, expected_dtypes=[expected_dtype]
+    )
+
+    expected = native_pd.DataFrame(
+        dataframe_input,
+        columns=["col"],
+        index=native_pd.Index(data=[101, 102, 103], dtype=np.int64),
+    )
+    created = pd.DataFrame(
+        dataframe_input,
+        columns=["col"],
+        index=native_pd.Index(data=[101, 102, 103], dtype=np.int64),
+    )
+    assert_series_equal(
+        created.dtypes, native_pd.Series([logical_dtype], index=["col"])
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created, expected, expected_dtypes=[expected_dtype]
+    )
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, index",
+    [
+        (["a", "b", "c"], np.object_, native_pd.Index(data=[101, 102, 103])),
+        (
+            ["a", "b", "c"],
+            np.object_,
+            native_pd.Index(data=["a101", "aa102", "aaa103"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_string(dataframe_input, input_dtype, index):
+    expected = native_pd.Series(dataframe_input, dtype=input_dtype)
+    created = pd.Series(dataframe_input)
+    assert created.dtype == np.object_
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created,
+        expected,
+        # Note that we always present strings back as object types.
+        expected_dtypes=["object"],
+    )
+
+    expected = native_pd.DataFrame(dataframe_input, columns=["col"], index=index)
+    created = pd.DataFrame(dataframe_input, columns=["col"], index=index)
+    assert_series_equal(created.dtypes, native_pd.Series([np.object_], index=["col"]))
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created, expected, expected_dtypes=["object"]
+    )
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, index",
+    [
+        (["a", "b", "c"], "string", native_pd.Index(data=[101, 102, 103])),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_string_explicit(dataframe_input, input_dtype, index):
+    expected = native_pd.Series(dataframe_input, dtype=input_dtype)
+    created = pd.Series(dataframe_input, dtype=input_dtype)
+    assert created.dtype == np.object_
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created,
+        expected,
+        # Note that we always present strings back as object types.
+        expected_dtypes=["object"],
+    )
+
+
+@pytest.mark.parametrize(
+    "label1, label2",
+    [
+        (["level0"], ["col1", "col2", "col3"]),
+        # escaped
+        (["leve'l0"], ["co'l1", 'co"l2', "col3"]),
+        # duplicate
+        (["level0"], ["col1", "col2", "col1"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_insert_multiindex_multi_label(label1, label2):
+    arrays = [["apple", "apple", "banana", "banana"], [1, 2, 1, 2]]
+    index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
+    columns = pd.MultiIndex.from_product((label1, label2), names=["a1", "a2"])
+    data = [["p", 1, 1.0], ["q", 2, 2.0], ["r", 3, 3.0], ["s", 4, 4.0]]
+    expected = native_pd.DataFrame(data=data, columns=columns, index=index)
+    created = pd.DataFrame(data=data, columns=columns, index=index)
+
+    # The dtypes of the frame before to_pandas is called matches the frame of expected
+    # but this may not be the case after to_pandas
+    assert_series_equal(created.dtypes, expected.dtypes)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        created, expected, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype, expected_dtype, logical_dtype",
+    [
+        (
+            [
+                native_pd.Timestamp(1513393355, unit="s"),
+                native_pd.Timestamp(1513393355, unit="s"),
+            ],
+            "datetime64[ns]",
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
+        (
+            [
+                native_pd.Timestamp(1513393355, unit="s"),
+                native_pd.Timestamp(1513393355, unit="s"),
+                None,
+                pd.NaT,  # pd.NaT is used as missing value for datetime type, and will be mapped to NULL in snowflake
+            ],
+            "datetime64[ns]",
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
+        (
+            [
+                native_pd.Timestamp(1513393355, unit="s", tz="US/Pacific"),
+                native_pd.Timestamp(1513393355, unit="s", tz="US/Pacific"),
+            ],
+            "datetime64[ns, America/Los_Angeles]",
+            "datetime64[ns, America/Los_Angeles]",
+            "datetime64[ns]",
+        ),
+        (
+            [
+                native_pd.Timestamp(1513393355, unit="s"),
+                native_pd.Timestamp(1513393355, unit="s"),
+                None,
+            ],
+            "object",
+            "datetime64[ns]",
+            "datetime64[ns]",
+        ),
+        (
+            [
+                native_pd.Timestamp(1513393355, unit="s", tz="US/Pacific"),
+                None,
+                pd.NaT,  # pd.NaT is used as missing value for datetime type, and will be mapped to NULL in snowflake
+                native_pd.Timestamp(1513393355, unit="s", tz="US/Pacific"),
+            ],
+            "object",
+            "datetime64[ns, America/Los_Angeles]",
+            "datetime64[ns]",
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_time(dataframe_input, input_dtype, expected_dtype, logical_dtype):
+    expected = native_pd.Series(dataframe_input, dtype=expected_dtype)
+    created = pd.Series(dataframe_input, dtype=input_dtype)
+    # For snowpark pandas type mapping
+    assert created.dtype == logical_dtype
+    roundtripped = created.to_pandas()
+    assert_series_equal(
+        roundtripped, expected, check_dtype=False, check_index_type=False
+    )
+    assert roundtripped.dtype == expected.dtype
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, input_dtype",
+    [
+        ([1, False, "a"], np.object_),
+        ([1, None, 2.0, np.nan, "a"], np.object_),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_mixed(dataframe_input, input_dtype):
+    expected = native_pd.Series(dataframe_input)
+    assert expected.dtype == input_dtype
+    created = pd.Series(dataframe_input)
+    assert created.dtype == input_dtype
+    assert_snowpark_pandas_equal_to_pandas(
+        created,
+        expected,
+    )
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, snowpark_dtype",
+    [
+        ([1.1, "a", None], VariantType()),
+        (
+            [["a", "b", "c"], ["1", "2", "3"]],
+            ArrayType(),
+        ),
+        (
+            [{"a": 1, "b": 2}, {"c": 3, "d": 4}],
+            MapType(),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_object(dataframe_input, snowpark_dtype):
+    expected = native_pd.Series(dataframe_input)
+    assert expected.dtype == np.object_
+    created = pd.Series(dataframe_input)
+    assert created.dtype == np.object_
+    validate_series_snowpark_dtype(created, snowpark_dtype)
+    assert_snowpark_pandas_equal_to_pandas(created, expected)
+
+
+@pytest.mark.parametrize(
+    "input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype",
+    [
+        (np.dtype("int64"), np.dtype("int64"), LongType(), np.dtype("int64")),
+        (np.dtype("O"), np.dtype("O"), VariantType(), np.dtype("O")),
+        (np.dtype("float64"), np.dtype("float64"), DoubleType(), np.dtype("float64")),
+        (None, np.dtype("object"), VariantType(), np.dtype("object")),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_empty(input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype):
+    created = pd.Series(data=[], dtype=input_dtype)
+    assert created.dtype == expected_dtype
+    validate_series_snowpark_dtype(created, snowpark_dtype)
+    roundtripped = created.to_pandas()
+    assert roundtripped.dtype == to_pandas_dtype
+
+
+@pytest.mark.parametrize(
+    "index, expected_index_dtype",
+    [
+        (None, np.dtype("int64")),
+        (native_pd.Index([]), np.dtype("object")),
+        (native_pd.Index([], dtype="float64"), np.dtype("float64")),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_empty_index(index, expected_index_dtype):
+    expected = native_pd.Series(data=[], index=index)
+    assert expected.dtype == np.dtype("object")
+    assert expected.index.dtype == expected_index_dtype
+    created = pd.Series(data=[], index=index)
+    assert created.dtype == np.dtype("object")
+    assert created.index.dtype == expected_index_dtype
+    roundtripped = created.to_pandas()
+    assert roundtripped.dtype == np.dtype("object")
+    assert roundtripped.index.dtype == expected_index_dtype
+
+
+@pytest.mark.parametrize(
+    "input_data, type_msg",
+    [
+        (native_pd.Categorical([1, 2, 3, 1, 2, 3]), "category"),
+        (native_pd.Categorical(["a", "b", "c", "a", "b", "c"]), "category"),
+        (
+            native_pd.period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"),
+            r"period\[s\]",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_unsupported_dtype_raises(input_data, type_msg) -> None:
+    with pytest.raises(
+        NotImplementedError, match=f"pandas type {type_msg} is not implemented"
+    ):
+        pd.Series(input_data)
+
+
+@pytest.mark.parametrize(
+    "input_data, input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype",
+    [
+        (
+            [1, 1, np.nan, None],
+            None,
+            np.dtype("float64"),
+            DoubleType(),
+            np.dtype("float64"),
+        ),
+        (
+            ["a", np.nan, "b", None],
+            np.dtype("O"),
+            np.dtype("O"),
+            StringType(),
+            np.dtype("O"),
+        ),
+        (
+            [2.0, 1.0, np.nan, 3, np.nan, None],
+            np.dtype("float64"),
+            np.dtype("float64"),
+            DoubleType(),
+            np.dtype("float64"),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_float_type_with_nan(
+    input_data, input_dtype, expected_dtype, snowpark_dtype, to_pandas_dtype
+):
+    snow_se = pd.Series(input_data, dtype=input_dtype)
+    assert snow_se.dtype == expected_dtype
+    validate_series_snowpark_dtype(snow_se, snowpark_dtype)
+    native_se = snow_se.to_pandas()
+    assert native_se.dtype == to_pandas_dtype
+    expected = native_pd.Series(input_data, dtype=to_pandas_dtype)
+    assert_series_equal(native_se, expected, check_index_type=False)
diff --git a/tests/integ/modin/frame/test_duplicated.py b/tests/integ/modin/frame/test_duplicated.py
new file mode 100644
index 00000000000..2bc7c5106f0
--- /dev/null
+++ b/tests/integ/modin/frame/test_duplicated.py
@@ -0,0 +1,91 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import is_scalar
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"], []])
+def test_duplicated_with_misspelled_column_name_or_empty_subset(subset):
+    df = native_pd.DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]})
+    # pandas explicitly check existence of the values from subset while Snowpark pandas does not since duplicated API is
+    # using getitem to fetch the column(s)
+    with pytest.raises((KeyError, ValueError)):
+        df.duplicated(subset)
+    expected_res = df.duplicated(["B"]) if "B" in subset else native_pd.Series([])
+    query_count = 1
+    if is_scalar(subset):
+        query_count += 1
+    with SqlCounter(query_count=query_count):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            pd.DataFrame(df).duplicated(subset),
+            expected_res,
+        )
+
+
+@pytest.mark.parametrize(
+    "subset, expected",
+    [
+        ("A", native_pd.Series([False, False, True, False, True])),
+        (["A"], native_pd.Series([False, False, True, False, True])),
+        (["B"], native_pd.Series([False, False, False, True, True])),
+        (["A", "B"], native_pd.Series([False, False, False, False, True])),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_duplicated_subset(subset, expected):
+    df = pd.DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "c", "b", "a"]})
+
+    result = df.duplicated(subset=subset)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.parametrize(
+    "keep, expected",
+    [
+        ("first", native_pd.Series([False, False, True, False, True])),
+        ("last", native_pd.Series([True, True, False, False, False])),
+        (False, native_pd.Series([True, True, True, False, True])),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_duplicated_keep(keep, expected):
+    df = pd.DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]})
+
+    result = df.duplicated(keep=keep)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_duplicated_on_empty_frame():
+    # GH 25184
+
+    df = pd.DataFrame(columns=["a", "b"])
+    dupes = df.duplicated("a")
+
+    result = df[dupes]
+    expected = native_pd.DataFrame(columns=["a", "b"])
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@sql_count_checker(query_count=5, join_count=4)
+def test_frame_datetime64_duplicated():
+    dates = pd.date_range("2010-07-01", end="2010-08-05")
+
+    tst = pd.DataFrame({"symbol": "AAA", "date": dates})
+    result = tst.duplicated(["date", "symbol"])
+    assert (~result).all()
+
+    tst = pd.DataFrame({"date": dates})
+    result = tst.date.duplicated()
+    assert (~result).all()
diff --git a/tests/integ/modin/frame/test_empty.py b/tests/integ/modin/frame/test_empty.py
new file mode 100644
index 00000000000..0ed4d2c9fa9
--- /dev/null
+++ b/tests/integ/modin/frame/test_empty.py
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "dataframe_input, test_case_name",
+    [
+        ({"A": [1, 2, 3], "B": [4, 5, 6]}, "simple non-empty"),
+        ({"A": [], "B": []}, "empty column"),
+        ({"A": [np.nan]}, "np nan column"),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_empty_param(dataframe_input, test_case_name):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(dataframe_input),
+        native_pd.DataFrame(dataframe_input),
+        lambda df: df.empty,
+        comparator=lambda x, y: x == y,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_empty_only_index(empty_index_native_pandas_dataframe):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(empty_index_native_pandas_dataframe),
+        empty_index_native_pandas_dataframe,
+        lambda df: df.empty,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/frame/test_ffill.py b/tests/integ/modin/frame/test_ffill.py
new file mode 100644
index 00000000000..292b35d856c
--- /dev/null
+++ b/tests/integ/modin/frame/test_ffill.py
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@sql_count_checker(query_count=1)
+def test_df_ffill():
+    native_df = native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.ffill(),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_df_pad():
+    native_df = native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.pad(),
+    )
diff --git a/tests/integ/modin/frame/test_fillna.py b/tests/integ/modin/frame/test_fillna.py
new file mode 100644
index 00000000000..c1e26a8e0da
--- /dev/null
+++ b/tests/integ/modin/frame/test_fillna.py
@@ -0,0 +1,655 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df():
+    return native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df_none_index():
+    # test case to make sure fillna only fill missing values in data columns not index columns
+    return native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+        index=[1, np.nan, 2, np.nan],
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df_2():
+    return native_pd.DataFrame(
+        [
+            [3, 2, np.nan, 0],
+            [3, 8, np.nan, 1],
+            [np.nan, 4, np.nan, np.nan],
+            [5, 7, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df_none_index_2():
+    return native_pd.DataFrame(
+        [
+            [3, 2, np.nan, 0],
+            [3, 8, np.nan, 1],
+            [np.nan, 4, np.nan, np.nan],
+            [5, 7, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+        index=[1, np.nan, 2, np.nan],
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df_dup():
+    # test case for df with duplicated column names
+    return native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABBB"),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_fillna_for_both_value_and_method_None_negative():
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame()
+
+    # Check error when `value` and `method` are both `None`.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.fillna(),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_match="Must specify a fill 'value' or 'method'.",
+        expect_exception_type=ValueError,
+    )
+
+    # Check error when `value` and `method` are both *not* `None`.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.fillna(value=1, method="ffill"),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot specify both 'value' and 'method'.",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_fillna_invalid_method_negative():
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame()
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.fillna(method="invalid_method"),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=r"Invalid fill method. Expecting pad \(ffill\) or backfill \(bfill\)\. Got invalid_method",
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_scalar(test_fillna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(1),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_scalar_none_index(test_fillna_df_none_index):
+    # note: none in index should not be filled
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df_none_index),
+        test_fillna_df_none_index,
+        lambda df: df.fillna(1),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_scalar_axis_1(test_fillna_df):
+    # axis=1 has no effect when fillna with scalar value.
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(1, axis=1),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_value_scalar_diff_type(test_fillna_df):
+    snow_df = pd.DataFrame(test_fillna_df)
+    # native pandas is able to upcast the column to object type if the type for the fillna
+    # value is different compare with the column data type. However, in Snowpark pandas, we stay
+    # consistent with the Snowflake type system, and a SnowparkSQLException is raised if the type
+    # for the fillna value is not compatible with the column type.
+    message = "Numeric value 'str' is not recognized"
+    with pytest.raises(SnowparkSQLException, match=message):
+        # call to_pandas to trigger the evaluation of the operation
+        snow_df.fillna("str").to_pandas()
+
+
+@sql_count_checker(query_count=1)
+def test_value_dict(test_fillna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna({"A": 1, "B": 3.0}),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_dict_dup(test_fillna_df_dup):
+    # test df with duplicate column names
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df_dup),
+        test_fillna_df_dup,
+        lambda df: df.fillna(
+            {"A": 1, "B": 2.0, "B": 5}  # noqa: F601
+        ),  # note pandas use the last one if it's duplicated when value is a dict
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dup_index_data_column_with_value_dict(test_fillna_df_none_index):
+    # set the index column name to be 'A', whose name is duplicate with one of the data
+    # columns
+    test_fillna_df_none_index.index.names = ["A"]
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df_none_index),
+        test_fillna_df_none_index,
+        lambda df: df.fillna({"A": 1, "B": 2.0}),
+    )
+
+
+@pytest.mark.parametrize(
+    "value_dict",
+    [
+        {("A", "B"): 1, "A": 2, ("A",): 3, ("A", "B", "C"): 4},
+        {"A": 2, ("A",): 3},
+        {("A",): 3, "A": 2},
+        {("A",): 3},
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_value_dict_dup_multiindex(value_dict):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(
+            [1, 2, 3, None], columns=pd.MultiIndex.from_tuples([("A", "B", "C")])
+        ),
+        native_pd.DataFrame(
+            [1, 2, 3, None], columns=pd.MultiIndex.from_tuples([("A", "B", "C")])
+        ),
+        lambda df: df.fillna(value_dict),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_value_series(test_fillna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(pd.Series({"A": 1, "B": 3.0}))
+        if isinstance(df, pd.DataFrame)
+        else df.fillna(native_pd.Series({"A": 1, "B": 3.0})),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_value_series_dup(test_fillna_df):
+    # note that pandas use the first value when value is duplicated in a series
+    series_dup = native_pd.Series([0, 1, 2, 3], index=["A", "B", "B", "C"])
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(pd.Series(series_dup))
+        if isinstance(df, pd.DataFrame)
+        else df.fillna(series_dup),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_scalar_inplace(test_fillna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(1, inplace=True),
+        inplace=True,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_value_scalar_limit_fallback(test_fillna_df):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(1, limit=1),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_argument_negative(test_fillna_df):
+    snow_df = pd.DataFrame(test_fillna_df)
+    # value = None and method = None
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(None),
+        expect_exception=True,
+        expect_exception_match="Must specify a fill 'value' or 'method'.",
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(),
+        expect_exception=True,
+        expect_exception_match="Must specify a fill 'value' or 'method'.",
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(None, method=None),
+        expect_exception=True,
+        expect_exception_match="Must specify a fill 'value' or 'method'.",
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(1, method="1"),
+        expect_exception=True,
+        expect_exception_match="Cannot specify both 'value' and 'method'.",
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(method="1"),
+        expect_exception=True,
+        expect_exception_match=re.escape(
+            "Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got 1"
+        ),
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(1, limit="1"),
+        expect_exception=True,
+        expect_exception_match=re.escape("Limit must be an integer"),
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna(1, limit=0),
+        expect_exception=True,
+        expect_exception_match=re.escape("Limit must be greater than 0"),
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna((1, 2)),
+        expect_exception=True,
+        expect_exception_match=re.escape('"value" parameter must be a scalar or dict'),
+        expect_exception_type=TypeError,
+    )
+    # note axis=1 with a mapping value is not implemented in pandas
+    eval_snowpark_pandas_result(
+        snow_df,
+        test_fillna_df,
+        lambda df: df.fillna({"A": 1}, axis=1),
+        expect_exception=True,
+        expect_exception_match=re.escape(
+            "Currently only can fill with dict/Series column by column"
+        ),
+        expect_exception_type=NotImplementedError,
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_multiindex():
+    tuples = [(1, "red", 3), (1, "blue", 3), ("blue", "red", 3), (1, "blue", 4)]
+    return pd.MultiIndex.from_tuples(tuples, names=("l1", "l2", "l3"))
+
+
+@pytest.fixture(scope="function")
+def test_fillna_multiindex_df(test_fillna_multiindex):
+    return native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=test_fillna_multiindex,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_multiindex_df_values_dict(test_fillna_multiindex_df):
+    values = {(1, "red"): 9, (2, "blue"): 30}
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_multiindex_df),
+        test_fillna_multiindex_df,
+        lambda df: df.fillna(values),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_multiindex_df_values_dict_various_levels(test_fillna_multiindex_df):
+    values = {(1, "red"): 9, 2: 12, (2, "blue"): 30}
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_multiindex_df),
+        test_fillna_multiindex_df,
+        lambda df: df.fillna(values),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_multiindex_df_values_series(test_fillna_multiindex_df, test_fillna_multiindex):
+    values = pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
+    native_values = native_pd.Series([10, 1, 2, 3], index=test_fillna_multiindex)
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_multiindex_df),
+        test_fillna_multiindex_df,
+        lambda df: df.fillna(values)
+        if isinstance(df, pd.DataFrame)
+        else df.fillna(native_values),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_multiindex_df_values_dict_level_diff(test_fillna_multiindex_df):
+    # key is 'blue' and pandas behavior is to match the top level in the multiindex
+    values = {"blue": 30}
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_multiindex_df),
+        test_fillna_multiindex_df,
+        lambda df: df.fillna(values),
+    )
+    # similarly, key is (1,'blue') and pandas behavior is to match the prefix levels in the multiindex
+    values = {(1, "blue"): 30}
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_multiindex_df),
+        test_fillna_multiindex_df,
+        lambda df: df.fillna(values),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_values_df(test_fillna_df, test_fillna_df_2):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df),
+        test_fillna_df,
+        lambda df: df.fillna(pd.DataFrame(test_fillna_df_2))
+        if isinstance(df, pd.DataFrame)
+        else df.fillna(test_fillna_df_2),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_values_df_none_index(test_fillna_df_none_index, test_fillna_df_none_index_2):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(test_fillna_df_none_index),
+        test_fillna_df_none_index,
+        lambda df: df.fillna(pd.DataFrame(test_fillna_df_none_index_2))
+        if isinstance(df, pd.DataFrame)
+        else df.fillna(test_fillna_df_none_index_2),
+    )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@pytest.mark.parametrize("axis", [0, 1])
+class TestFillNAMethod:
+    @sql_count_checker(query_count=1)
+    def test_df_fillna(self, test_fillna_df, method, axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(test_fillna_df),
+            test_fillna_df,
+            lambda df: df.fillna(method=method, axis=axis),
+        )
+
+    @sql_count_checker(query_count=1)
+    def test_df_fillna_none_index(self, test_fillna_df_none_index, method, axis):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(test_fillna_df_none_index),
+            test_fillna_df_none_index,
+            lambda df: df.fillna(method=method, axis=axis),
+        )
+
+    @sql_count_checker(query_count=1)
+    def test_df_fillna_all_nan_values(self, method, axis):
+        native_df = native_pd.DataFrame(
+            [
+                [np.nan] * 4,
+                [np.nan] * 4,
+                [np.nan] * 4,
+                [np.nan] * 4,
+            ]
+        )
+        snow_df = pd.DataFrame(native_df)
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda df: df.fillna(method=method, axis=axis)
+        )
+
+    @sql_count_checker(query_count=1)
+    def test_df_fillna_multiple_fill_values(self, method, axis):
+        # This test checks that if we have say the following df:
+        #      A    B    C
+        # 0  1.0  NaN  3.0
+        # 1  NaN  5.0  NaN
+        # 2  7.0  8.0  9.0
+        # 3  NaN  NaN  NaN
+        # using ffill, the NaN value in row 1 column A would be
+        # filled by 1, while the NaN value in row 3 column A
+        #  would be filled by 7 instead.
+        native_df = native_pd.DataFrame(
+            [[1, np.nan, 3], [np.nan, 5, np.nan], [7, 8, 9], [np.nan, np.nan, np.nan]],
+            columns=["A", "B", "C"],
+        )
+        if axis == 1:
+            native_df = native_df.T
+        snow_df = pd.DataFrame(native_df)
+
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda df: df.fillna(method=method, axis=axis)
+        )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@sql_count_checker(query_count=6)
+def test_df_fillna_method_tall_df(method):
+    first_row = [native_pd.DataFrame([[1, 2, 3, 4]], columns=["A", "B", "C", "D"])]
+    first_middle_rows = [
+        native_pd.DataFrame([[5, np.nan, np.nan, 8]], columns=["A", "B", "C", "D"])
+    ] * 2500
+    second_middle_rows = [
+        native_pd.DataFrame([[np.nan, 10, np.nan, 12]], columns=["A", "B", "C", "D"])
+    ] * 2500
+    third_middle_rows = [
+        native_pd.DataFrame(
+            [[np.nan, np.nan, np.nan, np.nan]], columns=["A", "B", "C", "D"]
+        )
+    ] * 2500
+    all_rows = (
+        first_row
+        + first_middle_rows
+        + second_middle_rows
+        + third_middle_rows
+        + first_row
+    )
+    native_df = native_pd.concat(all_rows)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.fillna(method=method),
+    )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@sql_count_checker(query_count=1)
+def test_df_fillna_method_reindexed_df_reversed_columns(method):
+    native_df = native_pd.DataFrame(
+        [
+            [1, np.nan, 3, np.nan, 5],
+            [6, np.nan, 8, np.nan, 10],
+            [11, np.nan, 13, np.nan, 15],
+            [16, np.nan, 18, np.nan, 20],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.iloc[:, ::-1].fillna(method=method, axis=1)
+    )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@sql_count_checker(query_count=1)
+def test_df_fillna_method_reindexed_df_reordered_columns(method):
+    native_df = native_pd.DataFrame(
+        [
+            [1, np.nan, 3, np.nan, 5],
+            [6, np.nan, 8, np.nan, 10],
+            [11, np.nan, 13, np.nan, 15],
+            [16, np.nan, 18, np.nan, 20],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[["B", "E", "A", "C", "D"]].fillna(method=method, axis=1),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_df_fillna_method_with_type_coercion_errors_for_variant_column_negative():
+    # Thanks to Snowflake's type coercions, we don't match pandas
+    # behavior when we are using fillna that involves filling values
+    # in differently typed columns.
+    native_df = native_pd.DataFrame(
+        [
+            [True, None, 3, np.nan, False],
+            [None, False, np.nan, 4, None],
+            [7, None, 5, None, True],
+            [None, None, np.nan, np.nan, None],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # This fails in Snowpark pandas, but not in vanilla pandas, because Snowpark pandas
+    # attempts to fill the value 7 from the variant column "A" into the boolean column
+    # "B", which fails due to casting errors.
+    with pytest.raises(
+        SnowparkSQLException, match="Failed to cast variant value 7 to BOOLEAN"
+    ):
+        snow_df.fillna(method="ffill", axis=1).to_pandas()
+
+
+def test_df_fillna_method_with_type_coercion_casts_all_as_bool_negative():
+    # Thanks to Snowflake's type coercions, we don't match pandas
+    # behavior when we are using fillna that involves filling values
+    # in differently typed columns.
+    native_df = native_pd.DataFrame(
+        [
+            [True, None, 3, np.nan, False],
+            [None, False, np.nan, 4, None],
+            [7, None, 5, None, True],
+            [None, None, np.nan, np.nan, None],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # In this next case, pandas will keep the original types of columns
+    # that have mixed values, but Snowpark pandas will convert the int
+    # columns to boolean.
+    def fillna_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            new_df = df.iloc[:, ::-1].fillna(method="ffill", axis=1)
+            # Can't use `astype(bool)` here since that converts missing values
+            # (None values) to `False`, but we need to keep the `None` values.
+            new_df["B"] = new_df["B"].apply(lambda x: bool(x) if x is not None else x)
+            new_df["C"] = new_df["C"].apply(lambda x: bool(x) if x is not None else x)
+            new_df["D"] = new_df["D"].apply(lambda x: bool(x) if x is not None else x)
+            return new_df
+        return df.iloc[:, ::-1].fillna(method="ffill", axis=1)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            fillna_helper,
+        )
+
+    # This checks that the same values (positionally) are filled in
+    # in the previous test.
+    def check_which_values_filled(df):
+        return df.iloc[:, ::-1].fillna(method="ffill", axis=1).isna()
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            check_which_values_filled,
+        )
diff --git a/tests/integ/modin/frame/test_filter.py b/tests/integ/modin/frame/test_filter.py
new file mode 100644
index 00000000000..352e55f1ef0
--- /dev/null
+++ b/tests/integ/modin/frame/test_filter.py
@@ -0,0 +1,154 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import random
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+def _generate_data(N=100):
+    data = {
+        "X": np.random.randint(-100, 100, size=N),
+        "Y": np.random.randint(-100, 100, size=N),
+        "Z": np.random.randint(-100, 100, size=N),
+    }
+
+    data["A"] = [
+        random.choice(["Elephant", "Zebra", "Giraffe", "Lion", "Penguin", "Mouse"])
+        for _ in range(N)
+    ]
+    data["B"] = [random.choice(["small", "large", "medium", None]) for _ in range(N)]
+    data["C"] = np.random.normal(size=N)
+
+    data["W"] = np.random.random(size=N) > 0.5
+    return data
+
+
+@pytest.mark.parametrize("key", [-1, 10, 1200])
+@sql_count_checker(query_count=1, join_count=0)
+def test_basic_filter_single_column(key):
+    data = {
+        "X": np.random.randint(0, 100, size=100),
+        "Y": np.random.randint(0, 100, size=100),
+        "Z": np.random.randint(0, 100, size=100),
+    }
+
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df[df.X > key])
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df[df["W"]],
+        lambda df: df[~df["W"]],
+        lambda df: df[(df.X > 20) & (df.X < 40)],
+        lambda df: df[(df["Y"] > 50) | df.X <= 20],
+        lambda df: df[df["A"] == "Zebra"],
+        lambda df: df[df["A"] != "Mouse"],
+        lambda df: df[~(df["A"] == "Zebra")],
+    ],
+)
+@sql_count_checker(query_count=6)
+def test_filtering_with_self(func):
+    data = _generate_data()
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=16, join_count=1, fallback_count=1, sproc_count=1)
+@pytest.mark.parametrize(
+    "func",
+    [
+        # Note 1: can't use B here e.g., because column contains None - but Snowflake would allow this.
+        # Note 2: Make sure that some unsupported operation is used below for the fallback to happen.
+        lambda df: df[df.A.str.casefold().str.startswith("P")],
+        lambda df: df[df.A.str.casefold().str.lower() == "zebra"],
+    ],
+)
+def test_filtering_with_self_fallback(
+    func,
+):
+    data = _generate_data()
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@pytest.mark.parametrize(
+    "df1,df2,func, expected_query_count, expected_join_count",
+    [
+        (
+            native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 3, 5]}),
+            native_pd.DataFrame({"X": ["a", "b", "c"], "Y": [0, 1, 0]}),
+            lambda df1, df2: df1[df2.Y == 0],
+            1,
+            1,
+        ),
+        (
+            native_pd.DataFrame({"A": [100, -20, 4.5, 7.8], "B": ["a", "b", "c", "d"]}),
+            native_pd.DataFrame({"X": [0, 1, 0, None], "Y": [20, 7, 3.2, 5]}),
+            lambda df1, df2: df1[(df2.Y >= 5) & (df2.Y < 10)],
+            1,
+            1,
+        ),
+        (
+            native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 3, 5]}, index=[4, 3, 5]),
+            native_pd.DataFrame(
+                {"X": ["a", "b", "c"], "Y": [0, 1, 0]}, index=[5, 4, 3]
+            ),
+            lambda df1, df2: df1[df2.Y != 0],
+            1,
+            1,
+        ),
+    ],
+)
+def test_filtering_df_with_other_df(
+    df1, df2, func, expected_query_count, expected_join_count
+):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        snow_df1 = pd.DataFrame(df1)
+        snow_df2 = pd.DataFrame(df2)
+
+        ans = func(df1, df2)
+        snow_ans = func(snow_df1, snow_df2)
+
+        assert_snowpark_pandas_equal_to_pandas(snow_ans, ans, check_dtype=False)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_filtering_df_with_other_df_unaligned_index():
+    native_df1 = native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 3, 5]}, index=[4, 3, 5])
+    native_df2 = native_pd.DataFrame(
+        {"X": ["a", "b", "c"], "Y": [0, 1, 0]}, index=[1, 3, 10]
+    )
+    snow_df1 = pd.DataFrame(native_df1)
+    snow_df2 = pd.DataFrame(native_df2)
+
+    # This behavior is different compare with native pandas. Native pandas raises IndexError if the index value of
+    # key is not in the index of the dataframe.
+    # In Snowpark pandas, we do not trigger eager evaluation for validation, and it actually produces a result with
+    # the filter value.
+    snow_res = snow_df1[snow_df2.Y != 0]
+    expected_res = native_pd.DataFrame({"A": [2], "B": [3]}, index=[3])
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_res, expected_res)
diff --git a/tests/integ/modin/frame/test_first_last_valid_index.py b/tests/integ/modin/frame/test_first_last_valid_index.py
new file mode 100644
index 00000000000..139b328f279
--- /dev/null
+++ b/tests/integ/modin/frame/test_first_last_valid_index.py
@@ -0,0 +1,129 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+
+
+@pytest.mark.parametrize(
+    "native_dataframe",
+    [
+        native_pd.DataFrame([None, None]),
+        native_pd.DataFrame(),
+        native_pd.DataFrame([None, 1, None]),
+        native_pd.DataFrame([np.nan, 3, np.nan]),
+        native_pd.DataFrame(
+            {"A": [None, 1, 2, None], "B": [3, 2, 1, None]}, index=[10, 11, 12, 13]
+        ),
+        native_pd.DataFrame(
+            {"A": [None, 1, 2, None], "B": [3, 2, 1, None]}, index=[13, 12, 11, 15]
+        ),
+        native_pd.DataFrame([5, 6, 7, 8], index=["i", "am", "iron", "man"]),
+        native_pd.DataFrame([None, None, 2], index=[None, 1, 2]),
+        native_pd.DataFrame([None, None, 2], index=[None, None, None]),
+    ],
+)
+def test_first_and_last_valid_index_dataframe(native_dataframe):
+    snow_dataframe = pd.DataFrame(native_dataframe)
+    with SqlCounter(query_count=1):
+        assert (
+            native_dataframe.first_valid_index() == snow_dataframe.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert native_dataframe.last_valid_index() == snow_dataframe.last_valid_index()
+
+
+def test_first_and_last_valid_none_float64index_dataframe():
+    native_dataframe = native_pd.DataFrame([None, 2, None], index=[None, None, 3])
+    snow_dataframe = pd.DataFrame(native_dataframe)
+    # Resulting value for valid index should be np.nan since Float64Index
+    # Convert to str and compare since nan != nan
+    with SqlCounter(query_count=1):
+        assert str(native_dataframe.first_valid_index()) == str(
+            snow_dataframe.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert str(native_dataframe.last_valid_index()) == str(
+            snow_dataframe.last_valid_index()
+        )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        [
+            np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
+            np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
+        ],
+        [
+            [None] * 8,
+            [None] * 8,
+        ],
+        [
+            [None, 1, 2, 3, 4, 5, 6, None],
+            [5] * 8,
+        ],
+        [
+            np.array(["None", "bar", "baz", "baz", "foo", "foo", "qux", "None"]),
+            np.array(["None", "two", "one", "two", "one", "two", "one", "None"]),
+        ],
+    ],
+)
+def test_first_and_last_valid_multiindex_dataframe(index):
+    native_dataframe = native_pd.DataFrame(np.random.randn(8, 4), index=index)
+    snow_dataframe = pd.DataFrame(native_dataframe)
+    # Should return a tuple in Multiindex case
+    with SqlCounter(query_count=1):
+        assert (
+            native_dataframe.first_valid_index() == snow_dataframe.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert native_dataframe.last_valid_index() == snow_dataframe.last_valid_index()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[10, 11, 12], [5, 6, 7], [8, 9, 10]],
+        [[None, None, None], [None, None, 7], [None, None, None]],
+    ],
+)
+def test_first_and_last_valid_none_float64_multiindex_dataframe(data):
+    arrays = [
+        [None, 3.0, None],
+        [None, 5.0, None],
+    ]
+    native_dataframe = native_pd.DataFrame(data, index=arrays)
+    snow_dataframe = pd.DataFrame(native_dataframe)
+    with SqlCounter(query_count=1):
+        assert str(native_dataframe.first_valid_index()) == str(
+            snow_dataframe.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert str(native_dataframe.last_valid_index()) == str(
+            snow_dataframe.last_valid_index()
+        )
+
+
+def test_first_and_last_valid_nested_multiindex_series():
+    arrays = [
+        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    native_dataframe = native_pd.DataFrame(
+        np.random.randn(6, 6), index=index[:6], columns=index[:6]
+    )
+    snow_dataframe = pd.DataFrame(native_dataframe)
+    with SqlCounter(query_count=1):
+        assert (
+            native_dataframe.first_valid_index() == snow_dataframe.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert native_dataframe.last_valid_index() == snow_dataframe.last_valid_index()
diff --git a/tests/integ/modin/frame/test_getattr.py b/tests/integ/modin/frame/test_getattr.py
new file mode 100644
index 00000000000..9e7d0c2e848
--- /dev/null
+++ b/tests/integ/modin/frame/test_getattr.py
@@ -0,0 +1,92 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import inspect
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "name, expected_query_count",
+    [("a", 1), ("columns", 0), ("index", 1), ("mean", 0)],
+)
+def test_getattr(name, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        native_df = native_pd.DataFrame(
+            {"a": [1, 2, 2], "b": [3, 4, 5], ("c", "d"): [0, 0, 1]}
+        )
+        snow = pd.DataFrame(native_df)
+        snow_res = getattr(snow, name)
+        native_res = getattr(native_df, name)
+        if isinstance(snow_res, (pd.Series, pd.DataFrame)):
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow_res, native_res
+            )
+        elif isinstance(snow_res, pd.Index):
+            assert_index_equal(snow_res, native_res, exact=False)
+        else:
+            # e.g., mean will return bound method similar to pandas
+            assert inspect.ismethod(snow_res)
+            assert inspect.ismethod(native_res)
+            assert type(snow_res) == type(native_res)
+
+
+@pytest.mark.parametrize(
+    "name", ["unknown", "____id_pack__", "__name__", "_cache"]  # _ATTRS_NO_LOOKUP
+)
+@sql_count_checker(query_count=0)
+def test_getattr_negative(name):
+    native_df = native_pd.DataFrame(
+        {"a": [1, 2, 2], "b": [3, 4, 5], ("c", "d"): [0, 0, 1]}
+    )
+    with pytest.raises(AttributeError, match="has no attribute"):
+        getattr(native_df, name)
+
+    snow = pd.DataFrame(native_df)
+    with pytest.raises(AttributeError):
+        getattr(snow, name)
+
+
+@sql_count_checker(query_count=3)
+def test_attribute_access():
+    # any label as string that forms a valid Python identifier according to
+    # https://docs.python.org/3/reference/lexical_analysis.html#identifiers
+    # and doesn't clash with any defined function is allowed in attribute access
+    # test here for case sensitivity and python identifier
+    data = {
+        "A": [1, 2, 3, 4],
+        "a": [1, 3, None, 2],
+        "some_randomidentifier12345": [None] * 4,
+    }
+
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.A)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.a)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.some_randomidentifier12345
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_attribute_access_negative():
+    snow_df = pd.DataFrame({"A": [1, 2, 3]})
+
+    # check non-contained column
+    with pytest.raises(AttributeError):
+        snow_df.X
+    # check case-sensitivity
+    with pytest.raises(AttributeError):
+        snow_df.a
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
new file mode 100644
index 00000000000..a24082e067d
--- /dev/null
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -0,0 +1,390 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import itertools
+import random
+from itertools import chain, combinations
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas import isna
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True, True, False, False, False, True, True],
+        [True] * 7,
+        [False] * 7,
+        np.array([True, True, False, False, False, True, True], dtype=bool),
+        pd.Index([True, True, False, False, False, True, True]),
+        [True],
+        [True, True, False, False, False, True, True, True],
+        pd.Index([], dtype=bool),
+        np.array([], dtype=bool),
+    ],
+)
+def test_df_getitem_with_boolean_list_like(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    # df[boolean list-like key] is the same as df.loc[:, boolean list-like key]
+    def get_helper(df):
+        if isinstance(df, pd.DataFrame):
+            return df[key]
+        # If pandas df, adjust the length of the df and key since boolean keys need to be the same length as the axis.
+        _df = df.iloc[: len(key)]
+        _key = key[: _df.shape[1]]
+        return _df[_key]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        get_helper,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.parametrize(
+    "key",
+    [
+        [random.choice("ABCDEFG") for _ in range(random.randint(1, 20))],
+        pd.Index(random.choice("ABCDEFG") for _ in range(random.randint(1, 20))),
+        np.array([random.choice("ABCDEFG") for _ in range(random.randint(1, 20))]),
+    ],
+)
+def test_df_getitem_with_string_list_like(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    # df[string list-like key] is the same as df.loc[:, string list-like key]
+    def get_helper(df):
+        return df[key]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        get_helper,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.parametrize(
+    "key",
+    [
+        [random.choice(range(7)) for _ in range(random.randint(1, 20))],
+        pd.Index(random.choice(range(7)) for _ in range(random.randint(1, 20))),
+        np.array([random.choice(range(7)) for _ in range(random.randint(1, 20))]),
+    ],
+)
+def test_df_getitem_with_int_list_like(key):
+    # df[int list-like key] is the same as df.loc[:, int list-like key]
+    def get_helper(df):
+        return df[key]
+
+    # Generate a dict that maps from int -> list of random ints
+    data = {i: [random.choice(range(10)) for _ in range(5)] for i in range(7)}
+    native_df = native_pd.DataFrame(data)
+    snowpark_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        get_helper,
+    )
+
+
+def _powerset(iterable):
+    s = list(iterable)
+    combos = chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
+
+    return [list(el) for el in combos]
+
+
+@pytest.mark.parametrize("key", _powerset(["A", "B", "C"]))
+@sql_count_checker(query_count=1)
+def test_df_getitem_with_string_labels(key):
+    data = {"A": [1, 2, None], "B": [3.1, 5, 6], "C": [None, "abc", "xyz"]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df[key])
+
+
+@pytest.mark.parametrize(
+    "key", list(filter(lambda key: len(key) > 0, _powerset(["X", "Y", "Z"])))
+)
+@sql_count_checker(query_count=1)
+def test_df_getitem_with_string_labels_deviates_from_pandas(key):
+    data = {"A": [1, 2, None], "B": [3.1, 5, 6], "C": [None, "abc", "xyz"]}
+    # pandas produces KeyError of the form KeyError("None of [Index(['X', 'Y', 'Z'], dtype='object')] are in the
+    # [columns]") whereas Snowpark pandas does not error and returns an empty result.
+    eval_snowpark_pandas_result(
+        *create_test_dfs(data),
+        lambda df: df[key] if isinstance(df, pd.DataFrame) else df.loc[:, []]
+    )
+
+
+LABEL_COLLECTION = ["A", None, 12, 4.56, ("a", "b"), (1, 2), np.nan]
+N = 5
+# generate dataframe based on LABEL collection
+DATA = np.array([np.random.randint(0, 100, N) for name in LABEL_COLLECTION]).T
+
+
+@pytest.mark.parametrize("key", LABEL_COLLECTION)
+@sql_count_checker(query_count=1)
+def test_df_getitem_with_labels_single_column(key):
+    snow_df = pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+    native_df = native_pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df[key])
+
+
+@sql_count_checker(query_count=0)
+def test_df_getitem_with_none_nan_columns():
+    # TODO SNOW-1201504: in pandas 2.1.4, indexing with pd.Index([None, nan], dtype=object) raises
+    # ValueError: Cannot mask with non-boolean array containing NA / NaN values
+    # This occurs because of a check in pandas index validation that removes nan values
+    # from array keys:
+    # https://github.com/pandas-dev/pandas/blob/6fa2a4be8fd4cc7c5746100d9883471a5a916661/pandas/core/common.py#L134-L139
+    # This check was performed differently in pandas 2.0.3, and this indexing operation would retrieve
+    # the columns corresponding to the (distinct) None and nan labels, as expected.
+    #
+    # In general, Snowpark pandas should treat None and nan as equivalent values. However, constructing
+    # an Index with dtype=object will keep them as separate values. If dtype=object is not explicitly specified
+    # (either in the columns of the DF constructor or in the indexing key), then None is implicitly coerced
+    # to nan.
+    key = pd.Index([None, np.nan], dtype=object)
+    snow_df = pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+    native_df = native_pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[key],
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot mask with non-boolean array containing NA / NaN values",
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        pd.Index(list(t), dtype="object")
+        for t in itertools.combinations(LABEL_COLLECTION, 2)
+        # This combination is covered by test_df_getitem_with_none_nan_columns
+        if list(t) != [None, np.nan]
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_getitem_with_labels_two_columns_with_index(key):
+    snow_df = pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+    native_df = native_pd.DataFrame(DATA, columns=LABEL_COLLECTION)
+
+    def helper(df):
+        ans_df = df[key]
+        # because columns is returned as an index object, and the missing value mapping is not 100% pandas,
+        # cast here to_pandas with columns' index object type to compare and explicitly convert
+        # Na to None to allow comparison
+        columns = pd.Index(
+            [None if isna(val) else val for val in ans_df.columns.values], "object"
+        )
+        if isinstance(ans_df, (pd.Series, pd.DataFrame)):
+            ans_df = ans_df.to_pandas()
+        ans_df.columns = columns
+        return ans_df
+
+    snow_res = helper(snow_df)
+    native_res = helper(native_df)
+    # For the column index for [12, None] the index type differs for Snowpark pandas from
+    # pandas. For Snowpark pandas the inferred_type attribute is mixed, whereas it is mixed-integer for pandas.
+    # I.e., Snowpark pandas has Index([None, 12.0], dtype='object') whereas pandas has Index([None, 12], dtype='object')
+    # to allow for this scenario, set check_column_type=False here.
+    assert_frame_equal(
+        snow_res,
+        native_res,
+        check_dtype=False,
+        check_column_type=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("key_data", [[], ["A"], ["F", "A", "B"], ["A", "G", "F", "A"]])
+def test_df_getitem_with_series(key_data):
+    columns = ["A", "B", "C", "D", "E", "F", "G"]
+    # set up Snowpark data
+    snow_df = pd.DataFrame(DATA, columns=columns)
+    snow_series = pd.Series(key_data)
+    # set up native pandas data
+    native_df = native_pd.DataFrame(DATA, columns=columns)
+    native_series = native_pd.Series(key_data)
+
+    # 1 query for iterating through the series to see if it's boolean, 1 query for to_pandas.
+    with SqlCounter(query_count=2):
+        # compute the results and compare
+        snow_ans = snow_df[snow_series]
+        native_ans = native_df[native_series]
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_ans,
+            native_ans,
+        )
+
+
+@pytest.mark.parametrize("key", [["a", "b", "c"], ["A", "a"], ["a", "a", "a"]])
+@sql_count_checker(query_count=1)
+def test_df_getitem_deviates_from_pandas(key):
+    columns = ["A", "B", "C"]
+    data = np.random.normal(size=(3, 3))
+    snow_df = pd.DataFrame(data, columns=columns)
+    native_df = native_pd.DataFrame(data, columns=columns)
+
+    # pandas produces KeyError("None of [Index(['a', 'b', 'c'], dtype='object')] are in the [columns]")
+    # Snowpark pandas API will return whatever columns exist in the key.
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df[key]
+        if isinstance(df, pd.DataFrame)
+        else df[[k for k in key if k in columns]],
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_df_getitem_calls_getitem():
+    N = 7
+    data = {i: [i] * N for i in range(N)}
+    native_df = native_pd.DataFrame(data)
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df[df[1]],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice(1, 4, 2),  # start < stop, step > 0
+        slice(1, 4, -2),  # start < stop, step < 0
+        slice(-1, -4, 2),  # start > stop, step > 0
+        slice(-1, -4, -2),  # start > stop, step < 0
+        slice(3, -1, 4),
+        slice(5, 1, -36897),
+        # start = step
+        slice(3, -1, 4),  # step > 0
+        slice(100, 100, 1245),  # step > 0
+        slice(-100, -100, -3),  # step < 0
+        slice(-100, -100, -36897),  # step < 0
+        slice(2, 1, -2),
+        # with None
+        slice(None, 2, 1),
+        slice(-100, None, -2),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_getitem_with_slice(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df[key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice("x", "y"),
+        slice("x", "z", 2),
+        slice("a", "z", -1),
+        slice("z", "a", -1),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_getitem_with_non_int_slice(key):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    index = ["x", "y", "z"]
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df[key], inplace=True)
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        np.array([True, True, False, False, False, True, True], dtype=bool),
+        [],
+        slice(2, 6, 4),
+        "baz",
+        [("foo", "one"), ("bar", "two")],
+    ],
+)
+def test_df_getitem_with_multiindex(
+    key, default_index_native_df, multiindex_native, native_df_with_multiindex_columns
+):
+    # Test __getitem__ with df with MultiIndex index.
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    # Keys below are column labels that don't exist in the current df - switch them to labels that exist.
+    _key = "A" if isinstance(key, str) else key
+    _key = (
+        ["D", "G"]
+        if (
+            isinstance(key, list)
+            and (sorted(key) == sorted([("foo", "one"), ("bar", "two")]))
+        )
+        else _key
+    )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df[_key])
+
+    # Test __getitem__ with df with MultiIndex columns.
+    native_df = native_df_with_multiindex_columns
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snowpark_df, native_df, lambda df: df[key], check_column_type=False
+        )
+
+    # Test __getitem__ with df with MultiIndex index.
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snowpark_df, native_df, lambda df: df[key], check_column_type=False
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_getitem_lambda_dataframe():
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+
+    # TODO SNOW-980818: Raise ValueError until dataframe key support is implemented
+    # The ValueError is being thrown by _validate_get_locator_key(col_key) in indexer.py
+    with pytest.raises(ValueError):
+        snow_df[lambda x: x < 2]
+
+
+@sql_count_checker(query_count=1)
+def test_getitem_lambda_series():
+    data = {"a": 1, "b": 2, "c": 3}
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    def helper(ser):
+        return ser[lambda x: x < 2]
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, helper)
diff --git a/tests/integ/modin/frame/test_head_tail.py b/tests/integ/modin/frame/test_head_tail.py
new file mode 100644
index 00000000000..fa77e539bdd
--- /dev/null
+++ b/tests/integ/modin/frame/test_head_tail.py
@@ -0,0 +1,73 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Any, Union
+
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Series
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+def eval_result_and_query_with_no_join(
+    snow: Union[DataFrame, Series],
+    native: Union[native_pd.DataFrame, native_pd.Series],
+    **kwargs: Any
+) -> None:
+    """
+    Verify no join in the produced query for the result dataframe.
+    """
+
+    sql = snow._query_compiler._modin_frame.ordered_dataframe.queries["queries"][-1]
+    assert "JOIN" not in sql
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow, native, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "n",
+    [1, None, 0, -1, -10, 5, 10],
+)
+@sql_count_checker(query_count=2)
+def test_head_tail(n, default_index_snowpark_pandas_df, default_index_native_df):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+
+@pytest.mark.parametrize(
+    "n",
+    [1, None, 0, -1, -10, 5, 10],
+)
+@sql_count_checker(query_count=2)
+def test_empty_dataframe(n, empty_snowpark_pandas_df):
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_df,
+        native_pd.DataFrame(),
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+        check_column_type=False,
+    )
+
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_df,
+        native_pd.DataFrame(),
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+        check_column_type=False,
+    )
diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py
new file mode 100644
index 00000000000..f5a8a6d4b85
--- /dev/null
+++ b/tests/integ/modin/frame/test_idxmax_idxmin.py
@@ -0,0 +1,267 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        (
+            {
+                "consumption": [10.51, 103.11, 55.48],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {
+                "consumption": [10.51, 103.11, 55.48],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            [10, 17, 12],
+        ),
+        (
+            {"consumption": [10.51, 56, 38.48], "co2_emissions": [10.51, 46, 58.48]},
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {
+                "consumption": [58.3, np.nan, np.nan],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {
+                "consumption": [np.nan, np.nan, np.nan],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {"consumption": [58.3, np.nan, np.nan], "co2_emissions": [np.nan, 36, 84]},
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {
+                "consumption": [58.3, np.nan, np.nan],
+                "co2_emissions": [np.nan, np.nan, np.nan],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        ([[1, 2], [3, 4]], [0, 0]),
+        ([[1, 2.5]], None),
+        ({"A": []}, []),
+        ({}, []),
+    ],
+)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("skipna", [True, False])
+def test_idxmax_idxmin_df(data, index, func, axis, skipna):
+    if data == {"A": []} and axis == 0:
+        pytest.xfail(
+            "Snowpark pandas returns a Series with None whereas pandas throws a ValueError"
+        )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data=data,
+            index=index,
+        ),
+        lambda df: getattr(df, func)(axis=axis, skipna=skipna),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        (
+            {
+                "consumption": ["i", "am", "batman"],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        pytest.param(
+            {"consumption": ["GA", "in", 2024], "co2_emissions": [37.2, 19.66, 1712]},
+            ["Pork", "Wheat Products", "Beef"],
+            marks=pytest.mark.xfail(
+                reason="pandas throws TypeError when Snowpark pandas returns result"
+            ),
+        ),
+        pytest.param(
+            [[1, pd.Timestamp(3)]],
+            None,
+            marks=pytest.mark.xfail(
+                reason="pandas throws TypeError when Snowpark pandas returns result"
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_idxmax_idxmin_df_numeric_only_axis_0_different_column_dtypes(
+    data, index, func, numeric_only
+):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data=data,
+            index=index,
+        ),
+        lambda df: getattr(df, func)(axis=0, numeric_only=numeric_only),
+    )
+
+
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        (
+            {
+                "consumption": ["i", "am", "batman"],
+                "co2_emissions": [37.2, 19.66, 1712],
+            },
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            {"consumption": ["GA", "in", 2024], "co2_emissions": [37.2, 19.66, 1712]},
+            ["Pork", "Wheat Products", "Beef"],
+        ),
+        (
+            [[1, pd.Timestamp(3)]],
+            None,
+        ),
+    ],
+)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("numeric_only", [True, False])
+def test_idxmax_idxmin_df_numeric_only_axis_1_different_column_dtypes(
+    data, index, func, numeric_only
+):
+    native_df = native_pd.DataFrame(data=data, index=index)
+    snow_df = pd.DataFrame(native_df)
+
+    if numeric_only:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(df, func)(axis=1, numeric_only=numeric_only),
+            )
+    else:
+        with SqlCounter(query_count=0):
+            # pandas raises a TypeError that is type-specific (e.g. "string and float type can't be compared")
+            # vs. the Modin frontend returns "reduce operation 'argmax' not allowed for this dtype"
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(df, func)(axis=1, numeric_only=numeric_only),
+                expect_exception=True,
+                expect_exception_type=TypeError,
+                assert_exception_equal=False,
+            )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [0, 1])
+def test_idxmax_idxmin_with_dates(func, axis):
+    native_df = native_pd.DataFrame(
+        data={
+            "date_1": ["2000-01-01", "2000-01-02", "2000-01-03"],
+            "date_2": ["2000-01-04", "1999-12-18", "2005-01-03"],
+        },
+        index=[10, 17, 12],
+    )
+    native_df["date_1"] = native_pd.to_datetime(native_df["date_1"])
+    native_df["date_2"] = native_pd.to_datetime(native_df["date_2"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: getattr(df, func)(axis=axis),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [0, 1])
+def test_idxmax_idxmin_with_strings(func, axis):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data={
+                "string_1": ["i", "am", "batman"],
+                "string_2": ["you", "are", "bane"],
+            },
+            index=[10, 17, 12],
+        ),
+        lambda df: getattr(df, func)(axis=axis),
+    )
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [0, 1])
+def test_idxmax_idxmin_empty_df_with_index(func, axis):
+    if axis == 0:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                *create_test_dfs(
+                    data={},
+                    index=["hello"],
+                ),
+                lambda df: getattr(df, func)(axis=axis),
+            )
+    else:
+        with SqlCounter(query_count=0):
+            # pandas raises ValueError("attempt to get argmax of an empty sequence") for
+            # an empty dataframe with an index, but in Snowpark pandas we return an empty
+            # dataframe. Since an extra check for a valid index adds a query, this behavior is
+            # not worth changing.
+            pytest.xfail(
+                reason="Snowpark pandas returns empty dataframe instead of ValueError"
+            )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.xfail(
+    strict=True, raises=TypeError, reason="Snowpark lit() cannot parse pd.Timestamp"
+)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [1])
+def test_idxmax_idxmin_crazy_column_names_axis_1(func, axis):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            [[1, 2], [3, 4]],
+            columns=[pd.Timestamp(1)] * 2,
+        ),
+        lambda df: getattr(df, func)(axis=axis),
+    )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.xfail(strict=True, raises=NotImplementedError)
+def test_idxmax_idxmin_multiindex_unsupported(func, axis):
+    arrays = [
+        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            np.random.randn(8, 4),
+            index=arrays,
+        ),
+        lambda df: getattr(df, func)(axis=axis),
+    )
diff --git a/tests/integ/modin/frame/test_iloc.py b/tests/integ/modin/frame/test_iloc.py
new file mode 100644
index 00000000000..af47c042406
--- /dev/null
+++ b/tests/integ/modin/frame/test_iloc.py
@@ -0,0 +1,3152 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import itertools
+import random
+import re
+from collections.abc import Iterable
+from typing import Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Series
+from modin.pandas.indexing import is_range_like
+from pandas._libs.lib import is_bool, is_list_like, is_scalar
+from pandas.errors import IndexingError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.frame.test_head_tail import eval_result_and_query_with_no_join
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+# default_index_snowpark_pandas_df and default_index_native_df have size of axis_len x axis_len
+AXIS_LEN = 7
+test_inputs_for_no_scalar_output = [
+    ([0], 1, 2),
+    (
+        [
+            2,
+            0,
+            1,
+            1,
+            1.0,
+            AXIS_LEN - 1.0,
+            AXIS_LEN - 0.1,
+            AXIS_LEN - 0.9,
+            -AXIS_LEN + 0.1,
+            -AXIS_LEN + 0.9,
+        ],
+        1,
+        2,
+    ),
+    ([], 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.int64), 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.int64), 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.int64), 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.float16), 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.float32), 1, 2),
+    (np.array([-AXIS_LEN, AXIS_LEN - 1], dtype=np.float64), 1, 2),
+    (
+        np.array(
+            [
+                2,
+                0,
+                1,
+                1,
+                1.0,
+                AXIS_LEN - 1.0,
+                AXIS_LEN - 0.1,
+                AXIS_LEN - 0.9,
+                -AXIS_LEN + 0.1,
+                -AXIS_LEN + 0.9,
+            ]
+        ),
+        1,
+        2,
+    ),
+    (native_pd.Index([0, 2, AXIS_LEN - 1]), 1, 2),
+    (native_pd.RangeIndex(1, AXIS_LEN - 1), 1, 0),
+    ([True, True, False, False, False, True, True], 1, 1),
+    (native_pd.Index([True, True, False, False, False, True, True]), 1, 1),
+    (Ellipsis, 1, 0),
+    (slice(0, AXIS_LEN + 1, 2), 1, 0),
+    (slice(-AXIS_LEN - 2, 0, -1), 1, 0),
+    (slice(-1, 0, -1), 1, 0),
+]
+
+test_inputs_for_range = [
+    slice(None),
+    slice(2, AXIS_LEN - 1),
+    slice(0, 10),
+    slice(-AXIS_LEN - 1, -2),
+    slice(2, 3, 1),
+    slice(-1, 3, 1),
+    slice(3, 3, 1),
+    slice(-AXIS_LEN, AXIS_LEN + 1, 1),
+    slice(None, 5, None),
+    range(3),
+]
+
+test_inputs_for_range2 = list(
+    slice(res[0], res[1], res[2]) for res in itertools.permutations([None, 0, -1, 1], 3)
+)
+
+test_negative_bound_list_input = [([-AXIS_LEN - 0.9], 1, 2)]
+test_int_inputs = [
+    (0, 1, 4),
+    (AXIS_LEN - 1, 1, 4),
+    (-AXIS_LEN, 1, 4),
+]
+test_inputs_on_df_for_dataframe_output = (
+    test_int_inputs + test_inputs_for_no_scalar_output
+)
+
+snowpark_pandas_input_keys = [
+    ("Index", 2),
+    ("Series", 2),
+    ("Series[positive_int]", 2),
+    ("Series_all_positive_int", 2),
+    ("RangeIndex", 0),
+    ("Index[bool]", 1),
+    ("emptyFloatSeries", 2),
+    ("multi_index_Series", 2),
+]
+
+# Snowflake type checking will fail if the item values aren't type compatible, so we normalize to int to stay compatible.
+setitem_key_val_pair = [
+    (([3, 1, 2], [0, 1]), np.random.randint(100, size=(3, 7))),
+    (([-3, -1, -2], [1, 0]), np.random.randint(100, size=(3, 7))),
+    ((slice(None), [1]), np.random.randint(100, size=(7, 3))),
+    (([3, 1, 2], [0, 1]), np.random.randint(100, size=7)),
+    ((slice(None), [3, 3]), ["991", "992", "993"]),
+    (([3, 1, 2], [1]), -1.5),
+    (((slice(None), [1]), [3, 1, 2]), -2.5),
+    ((slice(None), slice(0, 1, 1)), np.random.randint(100, size=(7, 3))),
+    (
+        ([True, False, False, True, False, False, True], [0, 1]),
+        np.random.randint(100, size=(3, 7)),
+    ),
+    ((3, 0), np.random.randint(7)),
+]
+
+# Test list-like key + all types of invalid val and other types of key + an invalid val
+setitem_key_negative_val_pair = [
+    ([3, 1, 2], [["991"]] * 7),
+    ([3, 1, 2], ["991"]),
+    ([3, 1, 2], [["991"] * 3 for _ in range(7)]),
+    (slice(0, 6, 2), [["991"]] * 7),
+    ([True, False, False, True, False, False, True], [["991"]] * 7),
+]
+
+setitem_snowpark_pandas_input_key = ["series", "series_broadcast", "df_broadcast"]
+negative_setitem_snowpark_pandas_input_key = ["7x1", "1x1", "7x3"]
+
+TEST_ITEMS_DATA_2X2 = {"foo": [-999, -998], "bar": [-997, -996]}
+
+
+@pytest.mark.parametrize(
+    "key, expected_query_count, expected_join_count",
+    test_negative_bound_list_input + test_inputs_on_df_for_dataframe_output,
+)
+def test_df_row_return_dataframe(
+    key,
+    expected_query_count,
+    expected_join_count,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.iloc[key],
+        )
+
+
+@pytest.mark.parametrize(
+    "key, expected_query_count, not_used",
+    test_inputs_on_df_for_dataframe_output,
+)
+def test_df_iloc_get_dataframe(
+    key,
+    expected_query_count,
+    not_used,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    with SqlCounter(query_count=expected_query_count, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.iloc[slice(None), key],
+        )
+
+
+@pytest.mark.parametrize("key", test_inputs_for_range)
+@sql_count_checker(query_count=1)
+def test_df_iloc_get_dataframe_with_range(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[slice(None), key],
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+
+# TODO: SNOW-916739 some of those cases should be optimized to skip joins
+@pytest.mark.parametrize("key", test_inputs_for_range2)
+def test_df_iloc_get_dataframe_with_range2(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    if key.step == 0:
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                default_index_snowpark_pandas_df,
+                default_index_native_df,
+                lambda df: df.iloc[slice(None), key],
+                expect_exception=True,
+                expect_exception_match="slice step cannot be zero",
+                expect_exception_type=ValueError,
+            )
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                default_index_snowpark_pandas_df,
+                default_index_native_df,
+                lambda df: df.iloc[slice(None), key],
+            )
+
+
+@pytest.mark.parametrize("row_key", [slice(None), [1, 0]])
+@pytest.mark.parametrize("col_key", [[0], [0, 0]])
+def test_df_iloc_get_dataframe_after_sort(row_key, col_key):
+    with SqlCounter(query_count=1, join_count=2 if isinstance(row_key, list) else 0):
+        native_df = native_pd.DataFrame({"A": [1, 2]})
+        # Generate a dataframe where ordering column is also a data column.
+        snow_df = pd.DataFrame(native_df)
+
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.iloc[row_key, col_key],
+        )
+
+
+@pytest.mark.parametrize(
+    "col_key, expected_cols",
+    [
+        (-1337, 0),
+        (1337, 0),
+        ([True, True, False, True], 3),
+        ([True, False, True, False, False, False, False, False, True], 2),
+    ],
+)
+def test_df_iloc_get_col_key_out_of_bounds(
+    col_key, expected_cols, default_index_snowpark_pandas_df
+):
+    key = (pd.Series([0, 2, 4]), col_key)
+    df = default_index_snowpark_pandas_df
+    result = df.iloc[key]
+    with SqlCounter(query_count=1, join_count=2):
+        assert result.shape == (3, expected_cols)
+
+
+@pytest.mark.parametrize(
+    "key, expected_join_count",
+    snowpark_pandas_input_keys,
+)
+def test_df_iloc_get_row_input_snowpark_pandas_return_dataframe(
+    key,
+    expected_join_count,
+    iloc_snowpark_pandas_input_map,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    expected_query_count = 2
+    if key == "Index" or key == "Index[bool]" or key == "RangeIndex":
+        expected_query_count = 1
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.iloc[iloc_snowpark_pandas_input_map[key]],
+        )
+
+
+@pytest.mark.parametrize(
+    "key, not_used_for_test",
+    snowpark_pandas_input_keys,
+)
+def test_df_iloc_get_col_input_snowpark_pandas_return_dataframe(
+    key,
+    not_used_for_test,
+    iloc_snowpark_pandas_input_map,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    def eval_func(df):
+        label = iloc_snowpark_pandas_input_map[key]
+
+        # convert to native pandas because iloc_snowpandas_input_map[key] holds SnowPandas objects
+        if not isinstance(df, DataFrame) and isinstance(label, (Series, DataFrame)):
+            label = label.to_pandas()
+
+        return df.iloc[slice(None), label]
+
+    expected_query_count = 3
+    if key == "Index" or key == "RangeIndex" or key == "Index[bool]":
+        expected_query_count = 1
+
+    with SqlCounter(query_count=expected_query_count, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df, default_index_native_df, eval_func
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        (-AXIS_LEN, 1),
+        (..., 1, 2),  # leading ellipsis should be stripped
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_df_iloc_get_scalar(
+    key, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[key],
+        comparator=lambda x, y: type(x) is type(y) and x == y,
+    )
+
+
+@sql_count_checker(query_count=5, join_count=4)
+def test_df_iloc_get_callable(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[lambda x: x.index % 2 == 0],
+    )
+
+    def test_func(df: DataFrame) -> Series:
+        return df["A"] - 1
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[test_func],
+    )
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[(lambda x: x.index % 2 == 0, [2, 3])],
+    )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.set_index("B", inplace=False),
+        lambda df: df.sort_values("C", inplace=False),
+    ],
+    ids=["non-default_index", "sort_on_non-index"],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_df_iloc_get_indexed_and_sorted(func, default_index_native_df):
+    # This is to test ordering columns, index and row_position column are set correctly after an iloc that
+    # takes part of df column and row wise and in a different order.
+    native_df = func(default_index_native_df)
+    snow_df = DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.iloc[([3, 1], [2, 0])],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        ([], []),
+        (slice(None), []),
+        (slice(None), slice(None)),
+    ],
+)
+def test_df_iloc_get_empty_key(
+    key,
+    empty_snowpark_pandas_df,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            empty_snowpark_pandas_df,
+            native_pd.DataFrame(),
+            lambda df: df.iloc[key],
+            comparator=assert_snowpark_pandas_equal_to_pandas,
+            # Need check_column_type=False since the `inferred_type` of empty columns differs
+            # from native pandas (Snowpark pandas gives "empty" vs. native pandas "integer")
+            check_column_type=False,
+        )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.iloc[key],
+            check_column_type=False,
+        )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_df_iloc_get_empty(empty_snowpark_pandas_df):
+    _ = empty_snowpark_pandas_df.iloc[0]
+
+
+@sql_count_checker(query_count=1)
+def test_df_iloc_get_diff2native(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    # Native pandas raises error on df.iloc[..., np.array([0, 1])] because it calls tuple.count(Ellipsis) which raises
+    # `ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()` from
+    # numpy. This is a common error when not calling elementwise equality and seems like an unintended error.
+    # Thus df.iloc[..., np.array([0, 1])] is valid in snowpark pandas
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[..., np.array([0, 1])]
+        if isinstance(df, DataFrame)
+        else df.iloc[..., [0, 1]],
+    )
+
+
+@sql_count_checker(query_count=2, join_count=4)
+def test_df_iloc_get_with_conflict():
+    # index and data columns have conflict in get_by_col
+    df = DataFrame({"A": [0, 1]}, index=native_pd.Index([2, 3], name="A")).rename(
+        columns={"A": "__A__"}
+    )
+    native_df = native_pd.DataFrame(
+        {"A": [0, 1]}, index=native_pd.Index([2, 3], name="A")
+    ).rename(columns={"A": "__A__"})
+    eval_snowpark_pandas_result(df, native_df, lambda df: df.iloc[[0], [0, 0]])
+
+    # df's index and key's ordering column have conflict in get_by_col
+    df = DataFrame(
+        {"A": [0, 1]}, index=native_pd.Index([2, 3], name="__row_position__")
+    )
+    native_df = native_pd.DataFrame(
+        {"A": [0, 1]}, index=native_pd.Index([2, 3], name="__row_position__")
+    )
+    eval_snowpark_pandas_result(df, native_df, lambda df: df.iloc[[0], [0, 0]])
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_df_iloc_get_sort_on_index():
+    df = DataFrame({"A": [0, 1]}).sort_values("A")
+    native_df = native_pd.DataFrame({"A": [0, 1]}).sort_values("A")
+    eval_snowpark_pandas_result(df, native_df, lambda df: df.iloc[[0], [0]])
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item_value",
+    [
+        ([3, 1], [0], 123),
+        ([3, 1, 2], [0], 987),
+        ([3, 2, 1], [0], [[123], [234], [345]]),
+        ([5, 6], [1], 99.99),
+        ([3, 6, 5], [1], 99.99),
+        ([5, 3, 6], [1], [[11.11], [22.22], [33.33]]),
+        ([4, 2], [1, 0], [[234, 345], [456, 567]]),
+        ([6, 4, 2], [1, 0], 234),
+        ([2, 4, 6], [1, 0], [[234, 345], [456, 567], [678, 789]]),
+        ([5, 3], [2], True),
+        ([5, 3, 1], [2], [True, True, True]),
+        ([1, 5, 3], [2], [False, False, False]),
+        ([3, 1], [3], "x"),
+        ([5, 3, 1], [3], "w"),
+        ([1, 5, 3], [3], [["x"], ["y"], ["z"]]),
+        ([4, 2], [4], "2022-12-31"),
+        ([4, 3, 1], [4], "2022-12-31"),
+        ([5, 3, 4], [4], [["2022-12-31"], ["2022-12-30"], ["]2022-12-29"]]),
+    ],
+)
+def test_df_iloc_set_different_item_compatible_types(
+    row_key,
+    col_key,
+    item_value,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    def operation(df):
+        df.iloc[row_key, col_key] = item_value
+
+    expected_join_count = 3 if isinstance(item_value, list) else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            operation,
+            inplace=True,
+        )
+
+
+# These test cases succeed in pandas but fail in Snowpark pandas because of snowflake type system.
+@pytest.mark.parametrize(
+    "item_value",
+    [
+        123,
+        99.99,
+        True,
+        "abc",
+        (99,),
+        [[88]],
+    ],
+)
+@pytest.mark.parametrize("col_key", [5, 6])
+@sql_count_checker(query_count=0)
+def test_df_iloc_set_different_incompatible_types(
+    col_key, item_value, default_index_snowpark_pandas_df
+):
+    row_key = [3, 5, 1]
+    df = default_index_snowpark_pandas_df
+
+    with pytest.raises(SnowparkSQLException):
+        df.iloc[row_key, col_key] = item_value
+        df.to_pandas()
+
+
+@pytest.mark.parametrize("key, val", setitem_key_negative_val_pair)
+@sql_count_checker(query_count=0)
+def test_df_iloc_set_negative(
+    key, val, default_index_snowpark_pandas_df, default_index_native_df
+):
+    def operation(df):
+        df.iloc[key] = val
+
+    # This fails on the Snowpark pandas side because of type system incompatibility in setting item value to the column
+    # in snowflake.
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        operation,
+        inplace=True,
+        expect_exception=True,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    setitem_snowpark_pandas_input_key,
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_set_snowpark_pandas_input_negative_incompatible_types(
+    key,
+    iloc_setitem_snowpark_pandas_pair_map,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    df = default_index_snowpark_pandas_df
+
+    with pytest.raises(SnowparkSQLException):
+        df.iloc[
+            iloc_setitem_snowpark_pandas_pair_map[key][0][0]
+        ] = iloc_setitem_snowpark_pandas_pair_map[key][0][1]
+        df.to_pandas()
+
+
+@sql_count_checker(query_count=1, join_count=3)
+def test_df_iloc_set_with_duplicates_single_row_item():
+    data = {"A": [1, 2, 3, 4], "B": [3, 4, 5, 2]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def helper(df):
+        # The logic in pandas is here to overwrite first element at [0, 0] with 99,
+        # then [0, 0] with 97, then [1, 0] with 96.
+        # By removing duplicates via taking the last_value in a group, this behavior can be simulated.
+        # see indexing_utils.py::set_frame_2d_positional for details.
+        df.iloc[[0, 0, 1], 0] = [99, 97, 96]
+
+    eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=3)
+def test_df_iloc_set_with_duplicates_2d():
+    data = {"A": [1, 2, 3, 4], "B": [3, 4, 5, 2]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    item_data = np.array([[99, 97, 96], [20, 27, 26], [71, 72, 73]])
+
+    snow_item = pd.DataFrame(item_data.T)
+    native_item = native_pd.DataFrame(item_data.T)
+
+    def helper(df):
+        # similar as for single_row case, but this time with 2d assignment incl. duplicates
+        df.iloc[[0, 0, 1], [0, 1, 0]] = (
+            snow_item if isinstance(df, pd.DataFrame) else native_item
+        )
+
+    eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_iloc_get_with_numpy_types():
+    data = {
+        "A": [np.int8(3)],
+        "B": [np.int16(4)],
+        "C": [np.int32(5)],
+        "D": [np.int64(6)],
+        "E": [np.float32(2.718)],
+        "F": [np.float64(3.14519)],
+        "G": [np.bool_(False)],
+        "H": [np.datetime64("2010-01-01T00:00:00", "25s")],
+        "I": [np.half(3.14159)],
+    }
+    native_df = native_pd.DataFrame(
+        data,
+        index=pd.Index(["foo"]),
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.iloc[0, 0 : len(data)],
+        inplace=True,
+    )
+
+
+# TODO: SNOW-955975 Add MultiIndex tests for columns. Add more .iloc[] tests for row and col.
+# Types of input to test with get iloc.
+KEY_TYPES = ["list", "series", "index", "ndarray", "index with name"]
+ILOC_GET_KEY_AXIS = ["row", "col"]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [True] * 7,
+        [False] * 7,
+        [random.choice([True, False]) for _ in range(7)],
+        # length mismatch
+        [random.choice([True, False]) for _ in range(random.randint(1, 7))],
+        [random.choice([True, False]) for _ in range(random.randint(8, 20))],
+    ],
+)
+@pytest.mark.parametrize("key_type", KEY_TYPES)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_key_bool(
+    key,
+    key_type,
+    axis,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    # Check whether df.iloc[key] and df.iloc[:, key] works on given df with:
+    # - boolean list      - boolean Index (with and without name)
+    # - boolean Series    - np.ndarray
+    def iloc_helper(df):
+        # Note:
+        # 1. boolean series key is not implemented in pandas, so we use list key to test it
+        # 2. if key length does not match with df, Snowpark will only select the row position the key contains; while
+        # pandas will raise error, so we first truncate the df for pandas and then compare the result
+        _df, _key = df, key
+        if isinstance(df, native_pd.DataFrame):
+            # If native pandas Series, truncate the series and key.
+            if axis == "row":
+                _df = _df[: len(_key)]
+                _key = _key[: len(_df)]
+            else:
+                _df = _df.iloc[:, : len(_key)]
+                _key = _key[: _df.shape[1]]
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key, dtype=bool)
+        elif key_type == "ndarray":
+            _key = np.array(_key)
+        elif key_type == "index with name":
+            _key = pd.Index(_key, name="some name", dtype=bool)
+        elif key_type == "series" and isinstance(_df, pd.DataFrame):
+            # Native pandas does not support iloc with Snowpark Series.
+            _key = pd.Series(_key, dtype=bool)
+
+        return _df.iloc[_key] if axis == "row" else _df.iloc[:, _key]
+
+    query_count = 2 if (key_type == "series" and axis == "col") else 1
+    expected_join_count = 0
+    if axis == "row":
+        if key == [] and key_type in ["list", "ndarray"]:
+            expected_join_count = 2
+        else:
+            expected_join_count = 1
+    else:
+        expected_join_count = 0
+
+    # test df with default index
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+    # test df with MultiIndex on index
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,  # some tests don't match index type with pandas
+        )
+
+    # test df with MultiIndex on columns
+    is_row = True if axis == "row" else False
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df_with_multiindex_columns,
+            native_df_with_multiindex_columns,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [random.choice([True, False]) for _ in range(random.randint(0, 100))],
+        [random.choice([True, False]) for _ in range(random.randint(1000, 2000))],
+    ],
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow test")
+def test_df_iloc_get_key_bool_series_with_1k_shape(key, native_df_1k_1k):
+    def iloc_helper(df):
+        # Note:
+        # 1. boolean series key is not implemented in pandas, so we use list key to test it
+        # 2. if key length does not match with df, Snowpark will only select the row position the key contains; while
+        # pandas will raise error, so we first truncate the df for pandas and then compare the result
+        return (
+            df.iloc[pd.Series(key, dtype="bool")]
+            if isinstance(df, pd.DataFrame)
+            else df.iloc[: len(key)].iloc[key[: len(df)]]
+        )
+
+    query_count = 6
+    high_count_reason = None
+    if len(key) >= 300:
+        query_count = 11
+        high_count_reason = """
+            6 queries includes 5 queries to prepare the temp table for df, including create, insert, drop the temp table (3)
+            and alter session to set and unset query_tag (2) and one select query.
+            11 queries add extra 5 queries to prepare the temp table for key
+        """
+
+    _test_df_iloc_with_1k_shape(
+        native_df_1k_1k, iloc_helper, query_count, 1, high_count_reason
+    )
+
+
+def _test_df_iloc_with_1k_shape(
+    native_df_1k_1k,
+    iloc_helper,
+    expected_query_count,
+    expected_join_count,
+    high_count_reason=None,
+):
+    df_1k_1k = pd.DataFrame(native_df_1k_1k)
+    high_count_expected = high_count_reason is not None
+
+    # test df with default index
+    with SqlCounter(
+        query_count=expected_query_count,
+        join_count=expected_join_count,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1k,
+            native_df_1k_1k,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    native_df_1k_1k_non_default_index = native_df_1k_1k.set_index("c0")
+    df_1k_1k_non_default_index = pd.DataFrame(native_df_1k_1k_non_default_index)
+    with SqlCounter(
+        query_count=expected_query_count,
+        join_count=expected_join_count,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1k_non_default_index,
+            native_df_1k_1k_non_default_index,
+            iloc_helper,
+        )
+
+    # test df 1 col with default index
+    native_df_1k_1 = native_df_1k_1k[["c0"]]
+    df_1k_1 = pd.DataFrame(native_df_1k_1)
+    with SqlCounter(
+        query_count=expected_query_count,
+        join_count=expected_join_count,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1,
+            native_df_1k_1,
+            iloc_helper,
+        )
+    native_df_1k_1_non_default_index = native_df_1k_1k[["c0", "c1"]].set_index("c0")
+    df_1k_1_non_default_index = pd.DataFrame(native_df_1k_1_non_default_index)
+
+    # test df 1 col with non-default index
+    with SqlCounter(
+        query_count=expected_query_count,
+        join_count=expected_join_count,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1_non_default_index,
+            native_df_1k_1_non_default_index,
+            iloc_helper,
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [0],
+        [-1],
+        # unsorted with duplicates
+        [2, 3, 1, 3, 2, 1],
+        [random.choice(range(-20, 20)) for _ in range(random.randint(1, 20))],
+        # implicitly support float
+        [-0.1, -1.9, 2.1, 2.6],
+        [random.uniform(-20, 20) for _ in range(random.randint(1, 20))],
+    ],
+)
+@pytest.mark.parametrize("key_type", KEY_TYPES)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_key_numeric(
+    key,
+    key_type,
+    axis,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    # Check whether df.iloc[key] and df.iloc[:, key] works on a given df with:
+    # - numeric list      - numeric Index (with and without name)
+    # - numeric Series    - np.ndarray
+    def iloc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # If native pandas DataFrame, remove out-of-bound values to avoid errors and compare.
+            # pandas bug: only in the case of column indexing, pandas does not allow integer keys of value less
+            # than -len(df). Convert anything between -7 and -6 to -6 for non-default index and keys between -8 and -7
+            # to -7. For example, in a df with 7 rows and 7 cols, df.iloc[:, [-7.1]] produces an error but
+            # df.iloc[:, [-7.0]] is fine. Both should round to -7.
+            _key = []
+            num_elements = num_cols if axis == "col" else 7  # num_rows is always 7
+            lower_bound, upper_bound = -num_elements - 1, num_elements
+            for k in key:
+                if lower_bound < k < upper_bound:
+                    _key.append(k if k >= (lower_bound + 1) else (lower_bound + 1))
+        else:
+            _key = key
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key, dtype=float)
+        elif key_type == "ndarray":
+            _key = np.array(_key)
+        elif key_type == "index with name":
+            _key = pd.Index(_key, name="some name", dtype=float)
+        elif key_type == "series" and isinstance(df, pd.DataFrame):
+            # Native pandas does not support iloc with Snowpark Series.
+            _key = pd.Series(_key, dtype=float if len(key) == 0 else None)
+        return df.iloc[_key] if axis == "row" else df.iloc[:, _key]
+
+    query_count = 2 if (key_type == "series" and axis == "col") else 1
+    join_count = 2 if axis == "row" else 0
+
+    # test df with default index
+    num_cols = 7
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    num_cols = 6  # set_index() makes the number of columns 6
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    is_row = True if axis == "row" else False
+    num_cols = 7
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,  # some tests don't match index type with pandas
+        )
+
+    # test df with MultiIndex on columns
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df_with_multiindex_columns,
+            native_df_with_multiindex_columns,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+
+def test_df_iloc_get_key_int_series_with_name(default_index_snowpark_pandas_df):
+    # assume the name of the key won't be populated into iloc result
+    series_key_wo_name = pd.Series([1, 2, 3])
+    series_key_with_name = pd.Series([1, 2, 3], name="series_with_name")
+    with SqlCounter(query_count=2, join_count=4):
+        assert_frame_equal(
+            default_index_snowpark_pandas_df.iloc[series_key_with_name],
+            default_index_snowpark_pandas_df.iloc[series_key_wo_name],
+        )
+
+    # For each series, the first query turns the series key into a list, the second query runs .iloc[].
+    with SqlCounter(query_count=4, join_count=0):
+        assert_frame_equal(
+            default_index_snowpark_pandas_df.iloc[(slice(None), series_key_with_name)],
+            default_index_snowpark_pandas_df.iloc[(slice(None), series_key_wo_name)],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        # int key with short size (i.e., generated by inline sql)
+        [random.randint(-1500, 1500) for _ in range(random.randint(0, 100))],
+        # float key with large size (i.e., generated by temp table)
+        [random.uniform(-1500, 1500) for _ in range(random.randint(1000, 1500))],
+    ],
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow test")
+def test_df_iloc_get_key_int_series_with_1k_shape(key, native_df_1k_1k):
+    def iloc_helper(df):
+        # similarly, remove out-of-bound values, so we can avoid error and compare
+        return (
+            df.iloc[pd.Series(key, dtype=float if len(key) == 0 else None)]
+            if isinstance(df, pd.DataFrame)
+            else df.iloc[[k for k in key if -1001 < k < 1000]]
+        )
+
+    high_count_reason = """
+        11 queries includes 5 queries to prepare the temp table for df, including create, insert,
+        drop the temp table (3.). and alter session to set and unset query_tag (2) and one select query.
+        Another 5 query to prepare the temp table for df again due to the fact it is used in another
+        join even though it is in the same query.
+        16 queries add extra 5 queries to prepare the temp table for key
+    """
+    query_count = 11 if len(key) < 300 else 16
+    _test_df_iloc_with_1k_shape(
+        native_df_1k_1k, iloc_helper, query_count, 2, high_count_reason
+    )
+
+
+ILOC_GET_INT_SCALAR_KEYS = [0, -3, 4, -7, 6, -6, -8, 7, 52879115, -9028751]
+
+
+@pytest.mark.parametrize("key", ILOC_GET_INT_SCALAR_KEYS)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_key_scalar(
+    key,
+    axis,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+
+    # Check whether DataFrame.iloc[key] and DataFrame.iloc[:, key] works with integer scalar keys.
+    def iloc_helper(df):
+        # use [] instead of out-of-bounds values
+        num_elements = num_cols if axis == "col" else 7  # num_rows is always 7
+        lower_bound, upper_bound = -num_elements - 1, num_elements
+        if isinstance(df, pd.DataFrame) or lower_bound < key < upper_bound:
+            return df.iloc[key] if axis == "row" else df.iloc[:, key]
+        else:
+            return native_pd.Series([]) if axis == "row" else df.iloc[:, []]
+
+    def determine_query_and_join_count():
+        # Multiple queries because of squeeze() - in range is 2, out-of-bounds is 1.
+        if axis == "col":
+            num_joins = 0
+            num_queries = 1
+        else:
+            if not -8 < key < 7:  # key is out of bound
+                num_queries, num_joins = 2, 8
+            else:
+                num_queries, num_joins = 1, 4
+        return num_queries, num_joins
+
+    query_count, join_count = determine_query_and_join_count()
+    # test df with default index
+    num_cols = 7
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    num_cols = 6  # set_index() makes the number of columns 6
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+    query_count, join_count = determine_query_and_join_count()
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    num_cols = 7
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,  # some tests don't match index type with pandas
+        )
+
+    # test df with MultiIndex on columns
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    in_range = True if (-8 < key < 7) else False
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        if axis == "row" or in_range:  # series result
+            eval_snowpark_pandas_result(
+                snowpark_df_with_multiindex_columns,
+                native_df_with_multiindex_columns,
+                iloc_helper,
+                check_index_type=False,
+            )
+        else:
+            eval_snowpark_pandas_result(  # df result
+                snowpark_df_with_multiindex_columns,
+                native_df_with_multiindex_columns,
+                iloc_helper,
+                check_index_type=False,
+                check_column_type=False,
+            )
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        if axis == "row" or in_range:  # series result
+            eval_snowpark_pandas_result(
+                snowpark_df,
+                native_df,
+                iloc_helper,
+                check_index_type=False,
+            )
+        else:  # df result
+            eval_snowpark_pandas_result(
+                snowpark_df,
+                native_df,
+                iloc_helper,
+                check_index_type=False,
+                check_column_type=False,
+            )
+
+
+@pytest.mark.parametrize("key", [-7.2, 6.5, -120.3, 23.9])
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=0, join_count=0)
+def test_df_iloc_get_with_float_scalar_negative(
+    key, axis, default_index_snowpark_pandas_df, default_index_native_df
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.iloc[key] if axis == "row" else df.iloc[:, key],
+        expect_exception=True,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key, error_msg, except_type",
+    [
+        (
+            native_pd.Series(["native", "pandas", "series", "of", "strings", ""]),
+            re.escape(
+                "<class 'pandas.core.series.Series'> is not supported as 'value' argument. "
+                + "Please convert this to Snowpark pandas objects by calling "
+                + "modin.pandas.Series()/DataFrame()"
+            ),
+            TypeError,
+        ),
+        (
+            native_pd.DataFrame({"A": [1, 2, 3, "hi"], "B": [0.9, -10, -5 / 6, "bye"]}),
+            re.escape(
+                "<class 'pandas.core.frame.DataFrame'> is not supported as 'value' argument. "
+                + "Please convert this to Snowpark pandas objects by calling "
+                + "modin.pandas.Series()/DataFrame()"
+            ),
+            TypeError,
+        ),
+        (
+            (Ellipsis, [Ellipsis, 1]),
+            re.escape(".iloc requires numeric indexers, got [Ellipsis, 1]"),
+            IndexError,
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_native_pd_key_raises_type_error_negative(
+    key, error_msg, default_index_snowpark_pandas_df, except_type
+):
+    # Check whether invalid keys passed in raise TypeError. Native pandas objects cannot be used as keys.
+    with pytest.raises(except_type, match=error_msg):
+        _ = default_index_snowpark_pandas_df.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key, error_msg",
+    [
+        (((1, 3), 0), "Too many indexers"),
+        ((1, 1, 1), "Too many indexers"),
+        (((0, 1), (0, 1)), "Too many indexers"),
+        ((..., ...), "indexer may only contain one '...' entry"),
+        ((..., ..., ...), "indexer may only contain one '...' entry"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_key_raises_indexing_error_negative(
+    key, error_msg, default_index_snowpark_pandas_df
+):
+    # Check whether invalid keys passed in raise IndexError. Raised when tuples or Categorical objects
+    # are used as row/col objects, too many ellipses used, more than two values inside a tuple key.
+    with pytest.raises(IndexingError, match=error_msg):
+        _ = default_index_snowpark_pandas_df.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice(0, 0.1),  # stop is not an int
+        slice(1, 2, 3.5),  # step is not an int
+        slice(1.1, 2.1, 3),  # start and stop are not ints
+    ],
+)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_invalid_slice_key_negative(
+    key, axis, default_index_snowpark_pandas_df
+):
+    # TypeError raised when non-integer scalars used as start, stop, or step in slice.
+    error_msg = "cannot do positional indexing with these indexers"
+    with pytest.raises(TypeError, match=error_msg):
+        _ = (
+            default_index_snowpark_pandas_df.iloc[key]
+            if axis == "row"
+            else default_index_snowpark_pandas_df.iloc[:, key]
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        None,
+        True,
+        False,
+        -3.14,
+        22 / 7,
+        np.nan,
+        np.array(["this", "is", "an", "ndarray!"]),
+        native_pd.Index(["index", "of", "strings"]),
+        pd.Index([]),
+        pd.Index([], dtype=str),
+        "string",
+        "test",
+        ["list", "of", "strings"],
+        np.array([1.2, None, "hi"]),
+    ],
+)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_non_numeric_key_negative(
+    key, axis, default_index_snowpark_pandas_df
+):
+    # Check whether invalid non-numeric keys passed in raise TypeError. list-like objects need to be numeric, scalar
+    # keys can only be integers. Native pandas Series and DataFrames are invalid inputs.
+
+    # General case fails with TypeError.
+    error_msg = re.escape(f".iloc requires numeric indexers, got {key}")
+    with pytest.raises(IndexError, match=error_msg):
+        _ = (
+            default_index_snowpark_pandas_df.iloc[key]
+            if axis == "row"
+            else default_index_snowpark_pandas_df.iloc[:, key]
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_non_numeric_key_categorical_negative(
+    default_index_snowpark_pandas_df,
+):
+    key = (12, native_pd.Categorical([1, 3, 5]))
+    error_msg = re.escape(f".iloc requires numeric indexers, got {key[1]}")
+    with pytest.raises(IndexError, match=error_msg):
+        _ = default_index_snowpark_pandas_df.iloc[key]
+
+
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_key_snowpark_df_input_negative(
+    axis, default_index_snowpark_pandas_df
+):
+    # Verify that Snowpark DataFrame is invalid input.
+    key = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    error_msg = "DataFrame indexer is not allowed for .iloc\nConsider using .loc for automatic alignment."
+    with pytest.raises(IndexError, match=error_msg):
+        _ = (
+            default_index_snowpark_pandas_df.iloc[key]
+            if axis == "row"
+            else default_index_snowpark_pandas_df.iloc[:, key]
+        )
+
+
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=1)
+def test_df_iloc_get_key_snowpark_empty_str_series_input_negative(
+    axis, default_index_snowpark_pandas_df
+):
+    # 3 out of the 4 queries are because of raising TypeError in _validate_iloc_keys_are_numeric. 4th query is to close
+    # the session.
+    # Verify that Empty Snowpark Series of string type is invalid input.
+    key = pd.Series([], dtype=str)
+    error_msg = re.escape(
+        ".iloc requires numeric indexers, got Series([], dtype: object"
+    )
+    with pytest.raises(IndexError, match=error_msg):
+        _ = (
+            default_index_snowpark_pandas_df.iloc[key]
+            if axis == "row"
+            else default_index_snowpark_pandas_df.iloc[:, key]
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        native_pd.Categorical((1, 3, -1)),
+        (native_pd.Categorical([1, 3, 4]), [0]),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_key_raises_not_implemented_error_negative(
+    key, default_index_snowpark_pandas_df
+):
+    # Verify that Categorical types raises NotImplementedError.
+    error_msg = re.escape("pandas type category is not implemented")
+    with pytest.raises(NotImplementedError, match=error_msg):
+        _ = default_index_snowpark_pandas_df.iloc[key]
+
+
+TEST_DATA_FOR_ILOC_GET_SLICE = [0, -10, -2, -1, None, 1, 2, 10]
+
+
+@pytest.mark.parametrize("start", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("stop", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("step", TEST_DATA_FOR_ILOC_GET_SLICE[1:])
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@pytest.mark.skipif(running_on_public_ci(), reason="large number of tests")
+def test_df_iloc_get_slice(
+    start,
+    stop,
+    step,
+    axis,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    def iloc_helper(df):
+        return (
+            df.iloc[slice(start, stop, step)]
+            if axis == "row"
+            else df.iloc[:, slice(start, stop, step)]
+        )
+
+    default_index_snowpark_pandas_df = pd.DataFrame(default_index_native_df)
+
+    # test df with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    is_row = True if axis == "row" else False
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=not is_row,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on columns
+    # Index class is different when the column type is MultiIndex.
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df_with_multiindex_columns,
+            native_df_with_multiindex_columns,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on both index and columns
+    # Index class is different when the column type is MultiIndex.
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_slice_with_invalid_step_negative(axis):
+    snowpark_df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    with pytest.raises(ValueError, match="slice step cannot be zero"):
+        _ = (
+            snowpark_df.iloc[slice(None, None, 0)]
+            if axis == "row"
+            else snowpark_df.iloc[:, slice(None, None, 0)]
+        )
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize(
+    "slice_key",
+    [
+        slice("a", None, None),
+        slice(None, "b", None),
+        slice(None, None, "c"),
+    ],
+)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+@sql_count_checker(query_count=0)
+def test_df_iloc_get_slice_with_non_integer_parameters_negative(slice_key, axis):
+    snowpark_df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    with pytest.raises(
+        TypeError, match="cannot do positional indexing with these indexers"
+    ):
+        _ = (
+            snowpark_df.iloc[slice_key]
+            if axis == "row"
+            else snowpark_df.iloc[:, slice_key]
+        )
+
+
+@pytest.mark.parametrize(
+    "range_key",
+    [
+        range(1, 4, 2),  # start < stop, step > 0
+        range(1, 4, -2),  # start < stop, step < 0
+        range(-1, -4, 2),  # start > stop, step > 0
+        range(-1, -4, -2),  # start > stop, step < 0
+        range(3, -1, 4),
+        range(5, 1, -36897),
+        # start = step
+        range(3, -1, 4),  # step > 0
+        range(100, 100, 1245),  # step > 0
+        range(-100, -100, -3),  # step < 0
+        range(-100, -100, -36897),  # step < 0
+        range(2, 1, -2),
+    ],
+)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_range(
+    range_key,
+    axis,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    def iloc_helper(df):
+        return df.iloc[range_key] if axis == "row" else df.iloc[:, range_key]
+
+    default_index_snowpark_pandas_df = pd.DataFrame(default_index_native_df)
+
+    # test df with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    # Index class is different when the column type is MultiIndex.
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    is_row = True if axis == "row" else False
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=not is_row,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on columns
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df_with_multiindex_columns,
+            native_df_with_multiindex_columns,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            iloc_helper,
+            check_index_type=False,
+            check_column_type=is_row,
+        )
+
+
+@pytest.mark.parametrize(
+    "range_key",
+    [
+        # pandas fails when:
+        # 1. start >= num_rows and step < 0 and stop < 0
+        # 2. start < -num_rows and step > 0 and stop > 0
+        # num_rows = 7
+        range(7, -1, -1),
+        range(1000, -500, -31556),
+        range(-8, 1, 1),
+        range(-119085, 1805, 15792),
+    ],
+)
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_range_deviating_behavior(range_key, axis, default_index_native_df):
+    def iloc_helper(df):
+        if isinstance(df, pd.DataFrame):
+            return df.iloc[range_key] if axis == "row" else df.iloc[:, range_key]
+
+        # Convert range key to slice key for comparison since pandas fails with given ranges.
+        start, stop, step = range_key.start, range_key.stop, range_key.step
+        if (start > stop and step > 0) or (start < stop and step < 0):
+            slice_key = slice(0, 0, 1)
+        else:
+            slice_key = slice(start, stop, step)
+        return df.iloc[slice_key] if axis == "row" else df.iloc[:, slice_key]
+
+    default_index_snowpark_pandas_df = pd.DataFrame(default_index_native_df)
+
+    with pytest.raises(IndexError, match="positional indexers are out-of-bounds"):
+        _ = (
+            default_index_native_df.iloc[range_key]
+            if axis == "row"
+            else default_index_native_df.iloc[:, range_key]
+        )
+
+    # test df with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    # test df with non-default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df.set_index("D"),
+            default_index_native_df.set_index("D"),
+            iloc_helper,
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_values",
+    [
+        # Test row and column positions in order, item single column projection
+        (
+            [1, 2, 3],
+            [0, 2, 3],
+            [91, 92, 93],
+        ),
+        # Test row and column positions in reverse order, item single column projection
+        (
+            [3, 2, 1],
+            [3, 2, 0],
+            [91, 92, 93],
+        ),
+        # Test row and column positions out-of-order, full item values
+        ([1, 0, 3], [3, 1, 2], [[91, 92, 93], [94, 95, 96], [97, 98, 99]]),
+        # Test row and column positions out-of-order, full item values including nulls
+        ([0, 3, 2], [2, 0, 3], [[None, 92, 93], [94, None, 96], [97, 98, None]]),
+        # Test row and column positions out-of-order, item single row projection
+        ([1, 2, 3], [0, 2, 3], [[91, 92, 93]]),
+        # Test row and column positions in-order, item multiple row single column
+        ([1, 2, 3], [3, 2, 0], [[91], [92], [93]]),
+        # Test row and column positions out-of-order, item single row projection all None values
+        ([2, 1, 3], [1, 0, 3], [[None, None, None]]),
+        # Test row and column positions in & out-of-order, item multiple row single column
+        ([1, 2, 0], [3, 1, 2], [[91], [92], [93]]),
+        # Test row and column positions out-of-order, item single row with None
+        ([2, 1, 3], [2, 3, 0], [[91, None, 93]]),
+        # Test with duplicate row positions
+        ([1, 2, 3], [1, 2, 2], [91, 92, 93]),
+        # Test with duplicate row and column positions
+        ([1, 2, 2], [3, 1, 1], [91, 92, 93]),
+        # Test with multiple duplicate row and column positions
+        ([2, 2, 0, 0], [2, 1], [91, 92, 93, 94]),
+        # Test with duplicate row and multiple column positions
+        ([1, 2, 0, 3], [3, 3, 2, 2], [91, 92, 93, 94]),
+        # Test row bool indexer and column non-bool indexer
+        (
+            [False, True, True, False],
+            [1, 2],
+            [99, 101],
+        ),
+        # Test row non-bool indexer and column bool indexer
+        (
+            [1, 2],
+            [False, True, False, True],
+            [99, 101],
+        ),
+        # Test row and column pool indexer, item single row projection
+        (
+            [False, True, False, False],
+            [True, True, True, True],
+            [[99, 101, 102, 103]],
+        ),
+        # Test row and column pool indexer, item single column projection
+        (
+            [True, True, True, True],
+            [False, True, False, False],
+            [99, 101, 102, 103],
+        ),
+    ],
+)
+def test_df_iloc_set_with_series_row_key_df_item_match_pandas(
+    numeric_test_data_4x4,
+    row_pos,
+    col_pos,
+    item_values,
+):
+    expected_query_count = 2
+    expected_join_count = (
+        2
+        if isinstance(row_pos, list) and all(isinstance(i, bool) for i in row_pos)
+        else 3
+    )
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos,
+            row_pos,
+            col_pos,
+            col_pos,
+            item_values,
+            item_values,
+            wrap_item="df",
+            wrap_row="series",
+            wrap_col="series",
+        )
+
+
+@pytest.mark.parametrize("start", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("stop", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("step", TEST_DATA_FOR_ILOC_GET_SLICE[1:])
+def test_df_iloc_set_with_row_key_slice_range(numeric_test_data_4x4, start, stop, step):
+    item_values = [99, 98, 97]
+    row_key = slice(start, stop, step)
+    col_key = [0, 1, 2]
+
+    with SqlCounter(query_count=1, join_count=3):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_key,
+            row_key,
+            col_key,
+            col_key,
+            item_values,
+            item_values,
+            wrap_item="na",
+            wrap_row="na",
+            wrap_col="na",
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_values",
+    [
+        # Test single index row position
+        (
+            [3],
+            [3, 2],
+            91,
+        ),
+        # Test single negative index row position
+        (
+            [-2],
+            [3, 2],
+            98,
+        ),
+        # Test single index row position
+        (
+            [3],
+            [3, 2],
+            [91, 92],
+        ),
+        # Test single index col position
+        (
+            [3, 2],
+            [3],
+            94,
+        ),
+        # Test single index col position
+        (
+            [3, 2],
+            [-2],
+            95,
+        ),
+        # Test 2-size index row positions
+        (
+            [3, 2],
+            [3, 2],
+            [91, 92],
+        ),
+        # Test 3-size index row positions
+        (
+            [3, 2, 1],
+            [3, 2, 0],
+            [91, 92, 93],
+        ),
+        # Test 3-size index row positions
+        (
+            [3, -2, -1],
+            [3, -2, 0],
+            [91, 92, 93],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "list_convert",
+    [
+        lambda l: list(l),
+        lambda l: np.array(l),
+        lambda l: pd.Index([(tuple(t) if is_list_like(t) else t) for t in l]),
+    ],
+)
+def test_df_iloc_set_with_row_key_list(
+    numeric_test_data_4x4, row_pos, col_pos, item_values, list_convert
+):
+    row_pos = list_convert(row_pos)
+
+    expected_query_count = 1
+    expected_join_count = 2 if isinstance(item_values, int) else 3
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos,
+            row_pos,
+            col_pos,
+            col_pos,
+            item_values,
+            item_values,
+            wrap_item="na",
+            wrap_row="na",
+            wrap_col="na",
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos",
+    [
+        ([3, 2], [3, 2]),
+        ([1, 2, 3], [1, 2, 3]),
+        ([3, 2, 1], [3, 2, 0]),
+        ([3, 1, 2], [1, 3, 2]),
+        ([0, 2, 3], [2, 0, 3]),
+        ([2, 3, 0], [3, 2, 1]),
+    ],
+)
+@pytest.mark.parametrize(
+    "item_values",
+    [
+        [99, 100, 101],
+        123,
+    ],
+)
+def test_df_iloc_set_with_row_tuple_key_list(
+    numeric_test_data_4x4, row_pos, col_pos, item_values
+):
+    if isinstance(item_values, list) and len(item_values) > len(row_pos):
+        item_values = item_values[: len(row_pos)]
+
+    expected_query_count = 1
+    expected_join_count = 2 if isinstance(item_values, int) else 3
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos,
+            row_pos,
+            col_pos,
+            col_pos,
+            item_values,
+            item_values,
+            wrap_item="na",
+            wrap_row="tuple",
+            wrap_col="tuple",
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_values, native_item",
+    [
+        # Snowpark pandas will only set co-ordinate (3,3) in this example rather than both (3,2) and (3,3) to
+        # keep the behavior and semantics consistent.
+        (
+            [3],
+            [3, 2],
+            91,
+            [91, 16],
+        ),
+        # Snowpark pandas will only set co-ordinate (3,3) in this example rather than both (3,2) and (3,3) to
+        # keep the behavior and semantics consistent.
+        (
+            [3],
+            [3, 2],
+            [91, 92],
+            [91, 16],
+        ),
+    ],
+)
+def test_df_iloc_set_with_row_tuple_key_list_different_behavior(
+    numeric_test_data_4x4, row_pos, col_pos, item_values, native_item
+):
+    snow_df = pd.DataFrame(numeric_test_data_4x4)
+    snow_row_key = tuple(row_pos)
+    snow_col_key = tuple(col_pos)
+    snow_df.iloc[snow_row_key, snow_col_key] = item_values
+
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+    native_df.iloc[row_pos, col_pos] = native_item
+
+    expected_join_count = 3 if isinstance(item_values, list) else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df,
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, wrap_row, col_pos, wrap_col",
+    [
+        ([1, 2, 3], "df", [1, 2, 3], "df"),
+        ([3, 2, 1], "df", [3, 2, 1], "df"),
+        ([2, 3, 0], "df", [3, 2, 1], "df"),
+        ([3, 1, 2], "df", [1, 3, 2], "tuple"),
+        ([1, 2, 0], "df", [1, 0, 2], "na"),
+        ([0, 2, 3], "tuple", [2, 0, 3], "df"),
+        ([0, 2, 1], "na", [1, 3, 0], "df"),
+    ],
+)
+def test_df_iloc_set_with_row_df_col_df_key_set_coordinates(
+    numeric_test_data_4x4,
+    row_pos,
+    wrap_row,
+    col_pos,
+    wrap_col,
+):
+    item_values = 123
+
+    expected_query_count = 1 if wrap_col == "tuple" or wrap_col == "na" else 2
+    expected_join_count = 2
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos,
+            row_pos,
+            col_pos,
+            col_pos,
+            item_values,
+            item_values,
+            wrap_item="na",
+            wrap_row=wrap_row,
+            wrap_col=wrap_col,
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, snow_item, native_item",
+    [
+        # Test with extra item values ignored (last row)
+        (
+            [1, 0, 3],
+            [3, 0, 2],
+            [
+                [91, 92, 93, 100],
+                [94, 95, 96, 101],
+                [97, 98, 99, 102],
+                [103, 104, 105, 106],  # ignored
+            ],
+            [[91, 92, 93], [94, 95, 96], [97, 98, 99]],
+        ),
+        # Test row and col positions with item value None
+        (
+            [1, 2, 3],
+            [0, 2, 3],
+            [None],
+            [[None] * 3] * 3,
+        ),  # native pandas will raise index error with key and item shape mismatch
+        # Test row and col positions with last item value row projected [94, 95, 96]
+        (
+            [1, 3, 2],
+            [2, 0, 3],
+            [[91, 92, 93], [94, 95, 96]],
+            [[91, 92, 93], [94, 95, 96], [94, 95, 96]],
+        ),
+        # Test row and col positions with last item column projected [[92], [95], [98]]
+        (
+            [3, 1, 2],
+            [1, 2, 0],
+            [[91, 92], [94, 95], [97, 98]],
+            [[91, 92, 92], [94, 95, 95], [97, 98, 98]],
+        ),
+        # Test with bool indexer all row and col positions, projecting row and column wise
+        (
+            [True, True, True, True],
+            [True, True, True, True],
+            [91, 92, 93],
+            [[91, 91, 91, 91], [92, 92, 92, 92], [93, 93, 93, 93], [93, 93, 93, 93]],
+        ),
+        # Test with shorter row and col bool indexer and larger item values (ignore 93)
+        (
+            ([True, True], [True, True, False, False]),
+            ([True, True], [True, True, False, False]),
+            [91, 92, 93],
+            [91, 92],
+        ),
+        # Another variation, test with shorter row and col bool indexer and larger item values (ignore 93, 94)
+        (
+            ([True, False, True], [True, False, True, False]),
+            ([True, False, True], [True, False, True, False]),
+            [91, 92, 93, 94],
+            [91, 92],
+        ),
+        # Test with larger row and col bool indexer and shorter item values (fewer than True bool indexes)
+        (
+            ([False, True, False, True, True, False], [False, True, False, True]),
+            ([False, True, True, True, True, False], [False, True, True, True]),
+            [91, 92],
+            [91, 92],
+        ),
+        # Test with empty items does not change the frame
+        (
+            [3, 2, 1],
+            [3, 2, 0],
+            [],
+            [[17, 16, 14], [14, 13, 11], [11, 10, 8]],
+        ),
+    ],
+)
+def test_df_iloc_set_with_row_key_series_rhs_dataframe_mismatch_pandas(
+    numeric_test_data_4x4, row_pos, col_pos, snow_item, native_item
+):
+    expected_query_count = 2
+    row_pos_values = row_pos[0] if isinstance(row_pos, tuple) else row_pos
+    expected_join_count = 2 if all(isinstance(i, bool) for i in row_pos_values) else 3
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos[0] if isinstance(row_pos, tuple) else row_pos,
+            row_pos[1] if isinstance(row_pos, tuple) else row_pos,
+            col_pos[0] if isinstance(col_pos, tuple) else col_pos,
+            col_pos[1] if isinstance(col_pos, tuple) else col_pos,
+            snow_item,
+            native_item,
+            wrap_item="df",
+            wrap_row="series",
+            wrap_col="series",
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_value",
+    [
+        # Test empty column positions and scalar
+        (
+            [1, 2, 3],
+            [],
+            99,
+        ),
+        # Test empty row positions and scalar
+        (
+            [],
+            [3, 2, 1],
+            98,
+        ),
+        # Test single cell with scalar
+        ([2], [1], 97),
+        # Test 2x2 locations with scalar
+        (
+            [3, 1],
+            [2, 0],
+            96,
+        ),
+        # Test 3x3 locations with scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            0,
+        ),
+        # Test 3x3 locations with single (list) scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            [234],
+        ),
+        # Test 3x3 locations with single (tuple) scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            (123,),
+        ),
+        # Test 3x3 locations with (numpy) scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            np.int_(-123),
+        ),
+        # Test 3x3 locations with single (numpy array) scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            np.array([np.int_(-123)]),
+        ),
+        # Test 3x3 locations with single (index) scalar
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            pd.Index([456]),
+        ),
+        # Test locations with 1d item (list) values
+        (
+            [3, 1, 2],
+            [0, 3, 1],
+            [99, 98, 97],
+        ),
+        # Test locations with 1d item (tuple) values
+        (
+            [3, 1, 2],
+            [0, 3, 1],
+            (99, 98, 97),
+        ),
+        # Test locations with 1d item (numpy array) values
+        (
+            [3, 1, 2],
+            [0, 3, 1],
+            np.array([99, 98, 97], np.int32),
+        ),
+        # Test locations with 1d item (index) values
+        (
+            [3, 1, 2],
+            [0, 3, 1],
+            pd.Index([99, 98, 97]),
+        ),
+        # Test 3x3 locations with 2d item (list) values
+        (
+            [2, 1, 0],
+            [1, 0, 2],
+            [[55, 56, 57], [58, 59, 60], [61, 62, 63]],
+        ),
+        # Test 3x3 locations with 2d item (tuple) values
+        (
+            [2, 1, 0],
+            [1, 0, 2],
+            ((55, 56, 55), (58, 59, 60), (61, 62, 63)),
+        ),
+        # Test 3x3 locations with 2d item (numpy array) values
+        (
+            [2, 1, 0],
+            [1, 0, 2],
+            np.array([[55, 56, 57], [58, 59, 60], [61, 62, 63]], np.int32),
+        ),
+        # Test 3x3 locations with 2d item (index) values
+        (
+            [2, 1, 0],
+            [1, 0, 2],
+            pd.Index([(55, 56, 57), (58, 59, 60), (61, 62, 63)]),
+        ),
+        # Test 3x3 locations with scalar 0
+        (
+            [2, 1, 3],
+            [3, 0, 2],
+            0,
+        ),
+        # Test boolean indexer with single list scalar
+        (
+            [True, False, True, False],
+            [False, True, False, True],
+            [-99],
+        ),
+    ],
+)
+def test_df_iloc_set_with_row_key_series_rhs_scalar(
+    numeric_test_data_4x4, row_pos, col_pos, item_value
+):
+    expected_query_count = 2
+    if isinstance(item_value, Iterable):
+        if len(item_value) > 1:
+            expected_join_count = 3
+        elif isinstance(item_value, list) and item_value[0] < 0:
+            expected_join_count = 1
+        else:
+            expected_join_count = 2
+    else:
+        expected_join_count = 2
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        helper_test_iloc_set_with_row_and_col_pos(
+            numeric_test_data_4x4,
+            row_pos,
+            row_pos,
+            col_pos,
+            col_pos,
+            item_value,
+            item_value,
+            wrap_item="na",
+            wrap_row="series",
+            wrap_col="series",
+        )
+
+
+def helper_test_iloc_set_with_row_and_col_pos(
+    data,
+    snow_row_pos,
+    native_row_pos,
+    snow_col_pos,
+    native_col_pos,
+    snow_item,
+    native_item,
+    wrap_item="df",
+    wrap_row="series",
+    wrap_col="series",
+):
+    """
+    Helper method to test iloc with various row, column key and item values.
+
+    Parameters:
+    -----------
+    data: data passed directly into dataframe
+    snow_row_pos: tuple of snow values or single row position values as a list.
+    native_row_pos: tuple of native values or single row position values as a list.
+    snow_col_pos: tuple of snow values or single column position values as a list.
+    native_col_pos: tuple of native values or single column position values as a list.
+    snow_item: Snowpark pandas values to set
+    native_item: Native pandas values to set (may be same as snow_item)
+    wrap_item: the rhs item should be wrapped as dataframe (if set to "df") or not.
+    wrap_row: wrap the row key as a dataframe ("df"), series ("series"), tuple ("tuple") or leave same.
+    wrap_col: wrap the column key as a dataframe ("df"), series ("series"), tuple ("tuple") or leave same.
+    """
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    snow_row_key, native_row_key = wrap_key_as_expected_type(
+        wrap_row, snow_row_pos, native_row_pos
+    )
+    snow_col_key, native_col_key = wrap_key_as_expected_type(
+        wrap_col, snow_col_pos, native_col_pos
+    )
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = (
+                snow_item
+                if is_scalar(snow_item) or not wrap_item == "df"
+                else pd.DataFrame(snow_item)
+            )
+        else:
+            df.iloc[native_row_key, native_col_key] = (
+                native_item
+                if is_scalar(native_item) or not wrap_item == "df"
+                else native_pd.DataFrame(native_item)
+            )
+
+    eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+def wrap_key_as_expected_type(wrap_type, snow_pos, native_pos):
+    if wrap_type == "series":
+        snow_key = pd.Series(snow_pos)
+        native_key = native_pd.Series(native_pos)
+    elif wrap_type == "df":
+        snow_key = pd.DataFrame(snow_pos)
+        native_key = native_pd.DataFrame(native_pos)
+    elif wrap_type == "tuple":
+        snow_key = tuple(snow_pos)
+        native_key = tuple(native_pos)
+    else:
+        snow_key = snow_pos
+        native_key = native_pos
+
+    if is_list_like(snow_pos) and len(snow_pos) == 0:
+        # An empty list defaults to float which then fails, so we convert to int in that case.
+        snow_key = snow_key.astype("int")
+
+    return snow_key, native_key
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key",
+    [
+        ([], []),
+        ([1, 2], []),
+        ([], [2, 1]),
+    ],
+)
+@pytest.mark.parametrize(
+    "item",
+    [
+        [100],
+        [],
+    ],
+)
+def test_df_iloc_set_with_row_key_series_empty_keys(
+    numeric_test_data_4x4, row_key, col_key, item
+):
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+
+    snow_df_1 = pd.DataFrame(numeric_test_data_4x4)
+    item_df_1 = pd.DataFrame(item)
+    with SqlCounter(query_count=2, join_count=3):
+        snow_df_1.iloc[
+            pd.Series(row_key).astype("int"), pd.Series(col_key).astype("int")
+        ] = item_df_1
+        eval_snowpark_pandas_result(snow_df_1, native_df, lambda df: df)
+
+    snow_df_2 = pd.DataFrame(numeric_test_data_4x4)
+    item_df_2 = pd.Series(item)
+    with SqlCounter(query_count=2, join_count=3):
+        snow_df_2.iloc[
+            pd.Series(row_key).astype("int"), pd.Series(col_key).astype("int")
+        ] = item_df_2
+        eval_snowpark_pandas_result(snow_df_2, native_df, lambda df: df)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item",
+    [
+        ([], [], []),
+        ([1, 2], [], []),
+        ([], [1, 2], []),
+        ([0], [0], [100]),
+        ([1], [1], [99]),
+        ([2, 1], [0, 1], [99, 100]),
+        ([1, 2], [1, 0], [[99, 100], [98, 101]]),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_iloc_set_with_empty_df(row_key, col_key, item):
+    snow_df = pd.DataFrame([])
+
+    snow_row_key = pd.Series(row_key).astype("int")
+    snow_col_key = pd.Series(col_key).astype("int")
+
+    # Note that pandas fails for most of these cases since the row and col key are out of bounds
+    # when the shape is (0,0) but we succeed as a no-op operation since we don't do any upfront
+    # validation.
+    snow_df.iloc[snow_row_key, snow_col_key] = pd.DataFrame(item)
+
+    assert snow_df.empty
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item",
+    [
+        ([1, 0], [0, 1], [[11, 22], [33, 44]]),
+        ([0, 1], [1], [55]),
+        ([0], [0], [100]),
+        ([1], [1], [99]),
+    ],
+)
+def test_df_iloc_set_with_none_df(row_key, col_key, item):
+    data = [[None, None], [None, None]]
+
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    snow_row_key = pd.Series(row_key)
+    native_row_key = native_pd.Series(row_key)
+
+    snow_col_key = pd.Series(col_key)
+    native_col_key = native_pd.Series(col_key)
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = pd.DataFrame(item)
+        else:
+            df.iloc[native_row_key, native_col_key] = native_pd.DataFrame(item)
+
+    with SqlCounter(query_count=2, join_count=3):
+        eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item",
+    [
+        ([0], [0], [99]),
+        ([1], [1], [True]),
+        ([2], [2], [101.101]),
+        ([3], [3], ["a"]),
+        ([0, 1, 2, 3], [0], [96, 97, 98, 99]),
+        ([0, 1, 2, 3], [1], [True, True, False, False]),
+        ([0, 1, 2, 3], [1], [999, True, 101, False]),
+        ([0, 1, 2, 3], [2], [12.34, 34.56, 56.78, 78.9]),
+        ([0, 1, 2, 3], [3], ["a", "b", "c", "d"]),
+        ([0, 1, 2, 3], [0, 1, 2, 3], [[99, True, 101.101, "a"]]),
+        ([1, 2], [1, 2], [[True, 9.9], [False, 10.9]]),
+    ],
+)
+def test_df_iloc_set_with_types_df(row_key, col_key, item):
+    mixed_data = {
+        "A": [1, 2, 3, 4],
+        "B": [True, False, True, False],
+        "C": [1.5, 3.14159, 99.99, 123.45],
+        "D": ["x", "y", "z", "w"],
+    }
+
+    snow_df = pd.DataFrame(mixed_data)
+    native_df = native_pd.DataFrame(mixed_data)
+
+    snow_row_key = pd.Series(row_key)
+    native_row_key = native_pd.Series(row_key)
+
+    snow_col_key = pd.Series(col_key)
+    native_col_key = native_pd.Series(col_key)
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = pd.DataFrame(item)
+        else:
+            df.iloc[native_row_key, native_col_key] = native_pd.DataFrame(item)
+
+    with SqlCounter(query_count=2, join_count=3):
+        eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key",
+    [
+        ([-5, -8], [-4, -1]),
+        ([4, 6], [-2, 8]),
+        ([10, 11], [6, -3]),
+        ([-10, 5], [9, 7]),
+        ([False, False, False, False, False], [False, False, False, False, False]),
+    ],
+)
+def test_df_iloc_set_with_row_key_series_out_of_bounds_keys(
+    numeric_test_data_4x4, row_key, col_key
+):
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+
+    expected_join_count = 2 if all(isinstance(i, bool) for i in row_key) else 3
+
+    snow_df_1 = pd.DataFrame(numeric_test_data_4x4)
+    item_df_1 = pd.DataFrame([100])
+    with SqlCounter(query_count=2, join_count=expected_join_count):
+        snow_df_1.iloc[pd.Series(row_key), pd.Series(col_key)] = item_df_1
+        eval_snowpark_pandas_result(snow_df_1, native_df, lambda df: df)
+
+    snow_df_2 = pd.DataFrame(numeric_test_data_4x4)
+    item_df_2 = pd.Series([100])
+    with SqlCounter(query_count=2, join_count=expected_join_count):
+        snow_df_2.iloc[pd.Series(row_key), pd.Series(col_key)] = item_df_2
+        eval_snowpark_pandas_result(snow_df_2, native_df, lambda df: df)
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos",
+    [
+        ([1.1, 2], [1, 2]),
+        ([1, 2], [0.4, 2.7]),
+        ([1.3, 2.8], [2.3, 1.1]),
+        ([-1.1, 2], [1, -2]),
+        ([-1, -2], [-0.4, -2.7]),
+        ([-1.3, -2.8], [-2.3, -1.1]),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_set_with_row_key_series_float_keys(
+    numeric_test_data_4x4, row_pos, col_pos
+):
+    snow_df = pd.DataFrame(numeric_test_data_4x4)
+    snow_row_key = pd.Series(row_pos)
+    snow_col_key = pd.Series(col_pos)
+    item_df = pd.DataFrame([99, 101])
+
+    with pytest.raises(
+        IndexError,
+        match=r"arrays used as indices must be of integer \(or boolean\) type",
+    ):
+        snow_df.iloc[snow_row_key, snow_col_key] = item_df
+
+
+@pytest.mark.parametrize(
+    "row_pos, wrap_row, col_pos, wrap_col",
+    [
+        (1.2, "na", 0, "na"),
+        (0, "na", 2.5, "na"),
+        (np.nan, "na", 2, "na"),
+        (0, "na", np.nan, "na"),
+        ([1.1, 2], "na", [1, 2], "na"),
+        ([2, 1.1], "tuple", [0, 1], "na"),
+        ([1.1, 2], "series", [1, 0], "na"),
+        ([1, 2], "na", [1, 2.3], "na"),
+        ([2, 1], "na", [0, 2.5], "tuple"),
+        ([1, 0], "na", [2, 2.9], "series"),
+        (np.array([1.3, 2.8], dtype=float), "na", [2, 1], "na"),
+        ([2, 1], "na", np.array([1.3, 2.8], dtype=float), "na"),
+        (["x", 2], "na", [1, 2], "na"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_iloc_set_with_row_key_constant_float_keys_negative(
+    numeric_test_data_4x4,
+    row_pos,
+    wrap_row,
+    col_pos,
+    wrap_col,
+):
+    snow_df = pd.DataFrame(numeric_test_data_4x4)
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+
+    snow_row_key, native_row_key = wrap_key_as_expected_type(wrap_row, row_pos, row_pos)
+    snow_col_key, native_col_key = wrap_key_as_expected_type(wrap_row, col_pos, col_pos)
+
+    item_values = [99, 101]
+    if is_scalar(row_pos) or is_scalar(col_pos):
+        item_values = item_values[0]
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = item_values
+        else:
+            df.iloc[native_row_key, native_col_key] = item_values
+        return df
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_iloc,
+        inplace=True,
+        expect_exception=True,
+        expect_exception_type=IndexError,
+        expect_exception_match=re.escape(
+            "arrays used as indices must be of integer (or boolean) type"
+        ),
+        # Snowpark pandas and pandas both raise an exception, but message is sometimes different for Snowpark pandas.
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item, expected_results, expect_exception",
+    [
+        ([0], [0], [True], {"A": [True, True, True, True]}, False),
+        ([1], [1], [0], None, False),
+        ([1], [1], [99], {"B": [True, True, True, False]}, False),
+        ([2], [2], ["a"], None, True),
+        ([3], [3], [101.101], None, True),
+        (
+            [0, 1, 2, 3],
+            [0],
+            [123.4, "x", 99],
+            {"A": [123.4, "x", 99, 99]},
+            False,
+        ),
+        ([0, 1, 2, 3], [1], [999, True, 101, False], None, False),
+        (
+            [0, 1, 2, 3],
+            [2],
+            [56.78, 78],
+            {"C": [56.78, 78.0, 78.0, 78.0]},
+            False,
+        ),
+        (
+            [0, 1, 2, 3],
+            [3],
+            ["a", "b", False, "d"],
+            {"D": ["a", "b", "false", "d"]},
+            False,
+        ),
+        ([0, 1, 2, 3], [0, 1, 2, 3], [[99, True, "x", "a"]], None, True),
+        (
+            [1, 2],
+            [1, 2],
+            [[True], [False]],
+            {"B": [True, True, False, False], "C": [True, True, False, True]},
+            False,
+        ),
+    ],
+)
+def test_df_iloc_set_with_mixed_types_fail(
+    row_key, col_key, item, expected_results, expect_exception
+):
+    # Some of these tests fail because of snowflake type system differences compared to pandas.  In some cases
+    # they succeed but yield different results from pandas due to type system differences.
+    mixed_data = {
+        "A": [1, 2, 3, 4],
+        "B": [True, False, True, False],
+        "C": [1.5, 3.14159, 99.99, 123.45],
+        "D": ["x", "y", "z", "w"],
+    }
+
+    snow_df = pd.DataFrame(mixed_data)
+    snow_row_key = pd.Series(row_key)
+    snow_col_key = pd.Series(col_key)
+
+    if expect_exception:
+        with SqlCounter(query_count=1, join_count=0):
+            with pytest.raises(SnowparkSQLException):
+                snow_df.iloc[snow_row_key, snow_col_key] = pd.DataFrame(item)
+                snow_df.to_pandas()
+    else:
+        if expected_results is not None:
+            native_mixed_data = mixed_data.copy()
+            native_mixed_data.update(expected_results)
+            native_df = native_pd.DataFrame(native_mixed_data)
+        else:
+            native_df = native_pd.DataFrame(mixed_data)
+            native_row_key = native_pd.Series(row_key)
+            native_col_key = native_pd.Series(col_key)
+            native_df.iloc[native_row_key, native_col_key] = native_pd.DataFrame(item)
+
+        def perform_iloc(df):
+            if isinstance(df, pd.DataFrame):
+                df.iloc[snow_row_key, snow_col_key] = pd.DataFrame(item)
+
+        with SqlCounter(query_count=2, join_count=3):
+            eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, row_key_index",
+    [
+        [1, None],
+        [[3, 0], None],
+        [[1, 2], [("A",), ("B",)]],
+        [[2, 1], [("A", 1), ("B", 2)]],
+    ],
+)
+@pytest.mark.parametrize(
+    "col_key, col_key_index",
+    [
+        [2, None],
+        [[2, 1], None],
+        [[1, 2], [("X",), ("Y",)]],
+        [[2, 1], [("X", 11), ("Y", 21)]],
+    ],
+)
+@pytest.mark.parametrize(
+    "item_values, item_index, item_columns, expected_join_count",
+    [
+        [999, None, None, 2],
+        [TEST_ITEMS_DATA_2X2, None, None, 3],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], None, 5],
+        [TEST_ITEMS_DATA_2X2, [("r", 20), ("s", 25)], [("e", 5), ("f", 6)], 5],
+        [TEST_ITEMS_DATA_2X2, None, [("e", 5), ("f", 6)], 3],
+    ],
+)
+def test_df_iloc_set_with_multi_index(
+    row_key,
+    row_key_index,
+    col_key,
+    col_key_index,
+    item_values,
+    item_index,
+    item_columns,
+    expected_join_count,
+):
+    df_data = [
+        [1, 2, 3, 4, 5],
+        [10, 11, 12, 13, 14],
+        [99, 101, 98, 102, 97],
+        [55, 56, 57, 58, 59],
+        [-5, -6, -7, -8, -9],
+    ]
+
+    col_index = pd.MultiIndex.from_tuples(
+        [("a", 1), ("b", 2), ("a", 2), ("b", 2), ("c", 3)]
+    )
+
+    row_index = pd.MultiIndex.from_tuples(
+        [("x", 99), ("y", 11), ("x", 11), ("y", 99), ("z", -12)]
+    )
+
+    snow_df = pd.DataFrame(df_data, index=row_index, columns=col_index)
+    native_df = native_pd.DataFrame(df_data, index=row_index, columns=col_index)
+
+    # If one of the row_key or col_key is not a list, then we'll shorten the item_values to expected shape/length
+    # for the iloc set operation.
+    if not (is_list_like(row_key) and is_list_like(col_key)):
+        max_key_len = 1
+        if is_list_like(row_key):
+            max_key_len = len(row_key)
+        if is_list_like(col_key):
+            max_key_len = len(col_key)
+        if isinstance(item_values, list):
+            item_values = item_values[0:max_key_len]
+        elif isinstance(item_values, dict):
+            item_values = list(item_values.values())[0][:max_key_len]
+        if item_index:
+            item_index = item_index[:max_key_len]
+        if item_columns:
+            item_columns = item_columns[:max_key_len]
+
+    if isinstance(item_values, list):
+        snow_items = pd.Series(item_values)
+        native_items = native_pd.Series(item_values)
+    elif isinstance(item_values, dict):
+        snow_items = pd.DataFrame(item_values)
+        native_items = native_pd.DataFrame(item_values)
+    else:
+        snow_items = item_values
+        native_items = item_values
+
+    if item_index:
+        snow_items.index = pd.MultiIndex.from_tuples(item_index)
+        native_items.index = pd.MultiIndex.from_tuples(item_index)
+    if item_columns:
+        snow_items.columns = pd.MultiIndex.from_tuples(item_columns)
+        native_items.columns = pd.MultiIndex.from_tuples(item_columns)
+
+    if row_key_index:
+        snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
+        native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
+    else:
+        snow_row_key = row_key
+        native_row_key = row_key
+
+    if col_key_index:
+        snow_col_key = pd.Series(col_key, index=pd.Index(col_key_index))
+        native_col_key = native_pd.Series(col_key, index=pd.Index(col_key_index))
+    else:
+        snow_col_key = col_key
+        native_col_key = col_key
+
+    def helper_iloc(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.iloc[native_row_key, native_col_key] = native_items
+        else:
+            df.iloc[snow_row_key, snow_col_key] = snow_items
+
+    expected_query_count = 1
+    if isinstance(snow_col_key, pd.Series):
+        expected_query_count += 1
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, helper_iloc, inplace=True)
+
+
+@pytest.mark.parametrize("axis", ILOC_GET_KEY_AXIS)
+def test_df_iloc_get_series_with_multiindex(
+    axis,
+    default_index_native_df,
+    default_index_snowpark_pandas_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    def run_multiindex_test(
+        _ser: pd.Series, _native_ser: native_pd.Series, query_count, join_count
+    ) -> None:
+        def iloc_helper(df: Union[pd.DataFrame, native_pd.DataFrame]) -> None:
+            series = _ser if isinstance(df, pd.DataFrame) else _native_ser
+            return df.iloc[series] if axis == "row" else df.iloc[:, series]
+
+        # test df with default index
+        with SqlCounter(query_count=query_count, join_count=join_count):
+            eval_snowpark_pandas_result(
+                default_index_snowpark_pandas_df,
+                default_index_native_df,
+                iloc_helper,
+            )
+        # test df with non-default index
+        with SqlCounter(query_count=query_count, join_count=join_count):
+            eval_snowpark_pandas_result(
+                default_index_snowpark_pandas_df.set_index("D"),
+                default_index_native_df.set_index("D"),
+                iloc_helper,
+            )
+
+        # test df with MultiIndex
+        # Index dtype is different between Snowpark and native pandas if key produces empty df.
+        native_df = default_index_native_df.set_index(multiindex_native)
+        snowpark_df = pd.DataFrame(native_df)
+        is_row = True if axis == "row" else False
+        with SqlCounter(query_count=query_count, join_count=join_count):
+            eval_snowpark_pandas_result(
+                snowpark_df,
+                native_df,
+                iloc_helper,
+                check_index_type=not is_row,
+                check_column_type=is_row,
+            )
+
+        # test df with MultiIndex on columns
+        snowpark_df_with_multiindex_columns = pd.DataFrame(
+            native_df_with_multiindex_columns
+        )
+        with SqlCounter(query_count=query_count, join_count=join_count):
+            eval_snowpark_pandas_result(
+                snowpark_df_with_multiindex_columns,
+                native_df_with_multiindex_columns,
+                iloc_helper,
+                check_index_type=False,
+                check_column_type=is_row,
+            )
+
+        # test df with MultiIndex on both index and columns
+        native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+        snowpark_df = pd.DataFrame(native_df)
+        with SqlCounter(query_count=query_count, join_count=join_count):
+            eval_snowpark_pandas_result(
+                snowpark_df,
+                native_df,
+                iloc_helper,
+                check_index_type=not is_row,
+                check_column_type=is_row,
+            )
+
+    # For a Series row key, the key is joined with the df to derive the iloc results. For column keys, a select
+    # statement is used instead of a join.
+    join_count = 2 if axis == "row" else 0
+    query_count = 1 if axis == "row" else 2
+
+    # Evaluate with MultiIndex created from tuples.
+    arrays = [
+        ["bar", "bar", "baz", "baz"],
+        ["one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    native_ser = native_pd.Series([2, 3, 4, 5], index=index)
+    ser = pd.Series([2, 3, 4, 5], index=index)
+    run_multiindex_test(ser, native_ser, query_count, join_count)
+
+    # Evaluate with MultiIndex created from product.
+    iterables = [["bar", "baz", "foo"], [22, 23]]
+    index = native_pd.MultiIndex.from_product(iterables, names=[2, "second"])
+    ser = pd.Series([0, 1, 2, 3, 4, 5], index=index)
+    native_ser = native_pd.Series([0, 1, 2, 3, 4, 5], index=index)
+    run_multiindex_test(ser, native_ser, query_count, join_count)
+
+    # Evaluate with MultiIndex created from a DataFrame.
+    dataframe = native_pd.DataFrame(
+        [["bar", "one"]],
+        columns=["first", "second"],
+    )
+    index = native_pd.MultiIndex.from_frame(dataframe)
+    ser = pd.Series([4], index=index)
+    native_ser = native_pd.Series([4], index=index)
+    run_multiindex_test(ser, native_ser, query_count, join_count)
+
+    # Evaluate with MultiIndex created from an empty DataFrame.
+    dataframe = native_pd.DataFrame([], columns=["first", "second"])
+    index = native_pd.MultiIndex.from_frame(dataframe)
+    ser = pd.Series([], index=index, dtype=int)
+    native_ser = native_pd.Series([], index=index, dtype=int)
+    query_count = query_count if axis == "row" else 2
+    run_multiindex_test(ser, native_ser, query_count, join_count)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+def test_df_iloc_get_multiindex_key_negative(
+    default_index_snowpark_pandas_df, multiindex_native
+):
+    err_msg = "key of type MultiIndex cannot be used with iloc"
+    with pytest.raises(TypeError, match=err_msg):
+        _ = default_index_snowpark_pandas_df.iloc[multiindex_native]
+    with pytest.raises(TypeError, match=err_msg):
+        _ = default_index_snowpark_pandas_df.iloc[:, multiindex_native]
+
+
+TEST_DATA_FOR_ILOC_GET_COMBINATIONS = [
+    slice(6, 1, -2),
+    range(2, 6, 3),
+    [],
+    # numeric list-like
+    [random.choice(range(-7, 7)) for _ in range(7)],
+    np.array([random.choice(range(-7, 7)) for _ in range(7)]),
+    native_pd.Index([random.choice(range(-7, 7)) for _ in range(7)], name="some name"),
+    native_pd.Series([random.choice(range(-7, 7)) for _ in range(7)]),
+    # boolean list-like
+    [random.choice([True, False]) for _ in range(7)],
+    np.array([random.choice([True, False]) for _ in range(7)]),
+    native_pd.Index([random.choice([True, False]) for _ in range(7)]),
+    native_pd.Series(
+        [random.choice([True, False]) for _ in range(7)], name="different name"
+    ),
+]
+
+
+@pytest.mark.parametrize("row", TEST_DATA_FOR_ILOC_GET_COMBINATIONS)
+@pytest.mark.parametrize("col", TEST_DATA_FOR_ILOC_GET_COMBINATIONS)
+@pytest.mark.parametrize("is_tuple", [True, False])
+def test_df_iloc_get_numeric_combinations_of_row_and_col(
+    row, col, is_tuple, default_index_snowpark_pandas_df, default_index_native_df
+):
+    def iloc_helper(df):
+        # Convert row and column keys into the appropriate format for testing and call .iloc get.
+        _row, _col = row, col
+        if isinstance(df, pd.DataFrame):
+            if isinstance(row, native_pd.Series):
+                _row = pd.Series(row, dtype=float if len(row) == 0 else None)
+            if isinstance(col, native_pd.Series):
+                _col = pd.Series(col, dtype=float if len(col) == 0 else None)
+        else:
+            # Convert boolean list-like data into corresponding numeric data since pandas does not support iloc get
+            # with boolean type across rows and columns.
+            if is_list_like(row) and len(row) > 0 and is_bool(row[0]):
+                _row = [idx for idx, element in enumerate(row) if element]
+            if is_list_like(col) and len(col) > 0 and is_bool(col[0]):
+                _col = [idx for idx, element in enumerate(col) if element]
+        return df.iloc[(_row, _col)] if is_tuple else df.iloc[_row, _col]
+
+    def determine_query_and_join_count():
+        # Initialize count values; query_count = row_count + col_count.
+        query_count = 1  # base query count
+        # All scalar and list-like row keys are treated like Series keys; a join is performed between the df and
+        # key. For slice and range keys, a filter is used on the df instead.
+        join_count = 2
+        if not isinstance(row, list) or len(row) > 0:
+            if is_range_like(row) or isinstance(row, slice):
+                join_count = 0
+            elif all(isinstance(i, bool) or isinstance(i, np.bool_) for i in row):
+                join_count = 1
+
+        query_count += 1 if isinstance(col, native_pd.Series) else 0
+        return query_count, join_count
+
+    qc, jc = determine_query_and_join_count()
+    # Test different combinations of rows and columns.
+    with SqlCounter(query_count=qc, join_count=jc):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df, default_index_native_df, iloc_helper
+        )
+
+
+@pytest.mark.parametrize(
+    "row,col",
+    [
+        (-4, native_pd.Series([False, False, False, False, False, False, True])),
+        (0, native_pd.Series([False, False, False, False, False, False, True])),
+        (1, native_pd.Series([False, False, False, False, False, True, True])),
+    ],
+)
+@sql_count_checker(query_count=2, join_count=4, union_count=1)
+def test_df_iloc_get_array_col(
+    row,
+    col,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    # This test ensures that when iloc would return a 1xN frame, where the only columns are
+    # internally represented by the Snowpark ARRAY type, no errors occur.
+    # Previously, a call to df.squeeze() led to a call of df.transpose(), which would fail
+    # when operating on a df with an ARRAY column.
+    # This test is separated from get_numeric_combinations_of_row_and_col to avoid adding
+    # too many permutations.
+    def iloc_helper(df):
+        # Convert row and column keys into the appropriate format for testing and call .iloc get.
+        _row, _col = row, col
+        if isinstance(df, pd.DataFrame):
+            if isinstance(row, native_pd.Series):
+                _row = pd.Series(row)
+            if isinstance(col, native_pd.Series):
+                _col = pd.Series(col)
+        else:
+            # Convert boolean list-like data into corresponding numeric data since pandas does not support iloc get
+            # with boolean type across rows and columns.
+            if is_list_like(row) and len(row) > 0 and is_bool(row[0]):
+                _row = [idx for idx, element in enumerate(row) if element]
+            if is_list_like(col) and len(col) > 0 and is_bool(col[0]):
+                _col = [idx for idx, element in enumerate(col) if element]
+        return df.iloc[_row, _col]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df, default_index_native_df, iloc_helper
+    )
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda df: df.iloc[lambda x: [1, 3]],
+        lambda df: df.iloc[lambda x: [1, 3], :],
+        lambda df: df.iloc[lambda x: [1, 3], lambda x: 0],
+        lambda df: df.iloc[lambda x: [1, 3], lambda x: [0]],
+        lambda df: df.iloc[[1, 3], lambda x: 0],
+        lambda df: df.iloc[[1, 3], lambda x: [0]],
+        lambda df: df.iloc[lambda x: [1, 3], 0],
+        lambda df: df.iloc[lambda x: [1, 3], [0]],
+    ],
+)
+def test_df_iloc_get_callable2(
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+    func,
+):
+    def run_test(snowpark_df, native_df):
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                snowpark_df, native_df, func, check_index_type=False
+            )
+
+    # test df with default index
+    run_test(default_index_snowpark_pandas_df, default_index_native_df)
+
+    # test df with non-default index
+    run_test(
+        default_index_snowpark_pandas_df.set_index("D"),
+        default_index_native_df.set_index("D"),
+    )
+
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    run_test(snowpark_df, native_df)
+
+    # test df with MultiIndex on columns
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    run_test(snowpark_df_with_multiindex_columns, native_df_with_multiindex_columns)
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    run_test(snowpark_df, native_df)
+
+
+# TODO: SNOW-962607 - unskip tests below when bug is fixed.
+@pytest.mark.skip("BUG: SNOW-962607 - test below fails due to transpose on array")
+def test_df_iloc_get_scalar_row_boolean_col(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    # df.iloc[-4, [False, False, False, False, False, False, True]] fails due to transpose on an array.
+    # Skip the test if row == -4 and col == [False, False, False, False, False, False, True].
+    row = -4
+    col = [False, False, False, False, False, False, True]
+    # test df with default index
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.iloc[row, col],
+        )
+
+
+@pytest.mark.skip(
+    "BUG: SNOW-962607 - test below can fail due to transpose on array -- flaky test"
+)
+@pytest.mark.parametrize("key1", [-4])
+@pytest.mark.parametrize("key2", TEST_DATA_FOR_ILOC_GET_COMBINATIONS)
+@pytest.mark.parametrize("is_tuple", [True, False])
+def test_df_iloc_get_scalar_and_any_key(
+    key1, key2, is_tuple, default_index_snowpark_pandas_df, default_index_native_df
+):
+    def iloc_helper(df):
+        # Convert row and column keys into the appropriate format for testing and call .iloc get.
+        _row, _col = row, col
+        if isinstance(df, pd.DataFrame):
+            if isinstance(row, native_pd.Series):
+                _row = pd.Series(row)
+            elif isinstance(col, native_pd.Series):
+                _col = pd.Series(col)
+        else:
+            # Convert boolean list-like data into corresponding numeric data since pandas does not support iloc get
+            # with boolean type across rows and columns.
+            if is_list_like(row) and len(row) > 0 and is_bool(row[0]):
+                _row = [idx for idx, element in enumerate(row) if element]
+            elif is_list_like(col) and len(col) > 0 and is_bool(col[0]):
+                _col = [idx for idx, element in enumerate(col) if element]
+        return df.iloc[(_row, _col)] if is_tuple else df.iloc[_row, _col]
+
+    def determine_query_and_join_count():
+        # Initialize count values; query_count = row_count + col_count.
+        if is_scalar(row) and isinstance(col, list) and not col:  # df.iloc[-4. []]
+            # out of the 11 queries, 9 are from squeeze(), 5 of which generated by creation of SnowflakeQueryCompiler
+            # during transpose operation. The remaining 2 queries are generated during comparison of results.
+            # 1 join comes from squeeze, 4 joins from transpose in squeeze, and the remaining from results assertion.
+            return 11, 7
+        if is_scalar(row):
+            query_count, join_count = 3, 3
+        else:
+            query_count = 1  # base counts
+            # All scalar and list-like row keys are treated like Series keys; a join is performed between the df and
+            # key. For slice and range keys, a filter is used on the df instead.
+            join_count = 0 if (is_range_like(row) or isinstance(row, slice)) else 1
+        query_count += 1 if isinstance(col, native_pd.Series) else 0
+        return query_count, join_count
+
+    row, col = key1, key2
+    qc, jc = determine_query_and_join_count()
+    with SqlCounter(query_count=qc, join_count=jc):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+    row, col = key2, key1
+    qc, jc = determine_query_and_join_count()
+    with SqlCounter(query_count=qc, join_count=jc):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            iloc_helper,
+        )
+
+
+@pytest.mark.parametrize("row", ILOC_GET_INT_SCALAR_KEYS)
+@pytest.mark.parametrize("col", ILOC_GET_INT_SCALAR_KEYS)
+@pytest.mark.parametrize("is_tuple", [True, False])
+def test_df_iloc_get_scalar_row_and_scalar_col(
+    row,
+    col,
+    is_tuple,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    multiindex_native,
+    native_df_with_multiindex_columns,
+):
+    def run_test(snowpark_df, native_df):
+        col_lower_bound, col_upper_bound = -num_cols - 1, num_cols
+
+        def iloc_helper(df):
+            return df.iloc[(row, col)] if is_tuple else df.iloc[row, col]
+
+        row_in_range = True if -8 < row < 7 else False
+        col_in_range = True if col_lower_bound < col < col_upper_bound else False
+        if row_in_range and col_in_range:
+            # scalar value is returned
+            with SqlCounter(query_count=1, join_count=2):
+                snowpark_res = (
+                    snowpark_df.iloc[(row, col)]
+                    if is_tuple
+                    else snowpark_df.iloc[row, col]
+                )
+                native_res = native_df.iloc[row, col]
+                if is_scalar(snowpark_res):
+                    assert snowpark_res == native_res
+                else:
+                    for idx, val in enumerate(snowpark_res):
+                        assert val == native_res[idx]
+        else:
+            with SqlCounter(query_count=1, join_count=2):
+                with pytest.raises(IndexError):
+                    iloc_helper(native_df)
+                assert len(iloc_helper(snowpark_df)) == 0
+
+    # test df with default index
+    num_cols = 7
+    run_test(default_index_snowpark_pandas_df, default_index_native_df)
+
+    # test df with non-default index
+    # When iloc is used on columns with non-default index: setting a column to index reduces the total number of
+    # columns by 1, therefore the valid range for indices in native pandas needs to be trimmed by 1.
+    num_cols = 6
+    run_test(
+        default_index_snowpark_pandas_df.set_index("D"),
+        default_index_native_df.set_index("D"),
+    )
+
+    # test df with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    num_cols = 7
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    run_test(snowpark_df, native_df)
+
+    # test df with MultiIndex on columns
+    snowpark_df_with_multiindex_columns = pd.DataFrame(
+        native_df_with_multiindex_columns
+    )
+    run_test(snowpark_df_with_multiindex_columns, native_df_with_multiindex_columns)
+
+    # test df with MultiIndex on both index and columns
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    run_test(snowpark_df, native_df)
+
+
+@sql_count_checker(query_count=1, join_count=3)
+def test_df_iloc_set_ffill_na_values_negative():
+    native_df = native_pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    snow_df = pd.DataFrame(native_df)
+    values = [[1, 2], [None, 4]]
+    ffilled_values = [[1, 2], [None, 4], [1, 4]]
+
+    def iloc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # Ideally, we would want to propagate the NA value in
+            # column 0, row 1, but our ffill algorithm does not support
+            # that. In that case, we could write this:
+            # df.iloc[[0, 1, 2]] = [values[0], values[1], values[1]]
+            # Instead, we ffill the last *non-NA* value, so we use:
+            df.iloc[[0, 1, 2]] = ffilled_values
+        else:
+            df.iloc[[0, 1, 2]] = values
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        iloc_helper,
+        inplace=True,
+    )
diff --git a/tests/integ/modin/frame/test_info.py b/tests/integ/modin/frame/test_info.py
new file mode 100644
index 00000000000..2a096e76fdc
--- /dev/null
+++ b/tests/integ/modin/frame/test_info.py
@@ -0,0 +1,115 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import io
+
+import modin.pandas as pd
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+def _assert_info_lines_equal(modin_info: list[str], pandas_info: list[str]):
+    # class is different
+    assert (
+        modin_info[0] == "<class 'snowflake.snowpark.modin.pandas.dataframe.DataFrame'>"
+    )
+    assert pandas_info[0] == "<class 'pandas.core.frame.DataFrame'>"
+
+    # index is different
+    assert "SnowflakeIndex" in modin_info[1]
+    assert "RangeIndex" in pandas_info[1]
+
+    assert modin_info[2:-1] == pandas_info[2:-1]
+
+    # memory usage is different
+    assert "memory usage" in pandas_info[-1]
+    assert modin_info[-1] in ("memory usage: 0.0+ bytes", "memory usage: 0.0 bytes")
+
+
+@sql_count_checker(query_count=1)
+def test_info_verbose_true():
+    with io.StringIO() as pandas_buffer, io.StringIO() as modin_buffer:
+        data = {"A": [-3, -2, 1], "B": [1, 2, 3]}
+        sdf = pd.DataFrame(data)
+        pdf = native_pd.DataFrame(data)
+
+        sdf.info(buf=modin_buffer, verbose=True)
+        pdf.info(buf=pandas_buffer, verbose=True)
+
+        _assert_info_lines_equal(
+            modin_buffer.getvalue().splitlines(),
+            pandas_buffer.getvalue().splitlines(),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_info_verbose_false():
+    with io.StringIO() as pandas_buffer, io.StringIO() as modin_buffer:
+        data = {"A": [-3, -2, 1], "B": [1, 2, 3]}
+        sdf = pd.DataFrame(data)
+        pdf = native_pd.DataFrame(data)
+
+        sdf.info(buf=modin_buffer, verbose=False)
+        pdf.info(buf=pandas_buffer, verbose=False)
+
+        _assert_info_lines_equal(
+            modin_buffer.getvalue().splitlines(),
+            pandas_buffer.getvalue().splitlines(),
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_info_counts_true():
+    with io.StringIO() as pandas_buffer, io.StringIO() as modin_buffer:
+        data = {"A": [-3, -2, 1], "B": [1, 2, 3]}
+        sdf = pd.DataFrame(data)
+        pdf = native_pd.DataFrame(data)
+
+        sdf.info(buf=modin_buffer, show_counts=True)
+        pdf.info(buf=pandas_buffer, show_counts=True)
+
+        _assert_info_lines_equal(
+            modin_buffer.getvalue().splitlines(),
+            pandas_buffer.getvalue().splitlines(),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_info_counts_false():
+    with io.StringIO() as pandas_buffer, io.StringIO() as modin_buffer:
+        data = {"A": [-3, -2, 1], "B": [1, 2, 3]}
+        sdf = pd.DataFrame(data)
+        pdf = native_pd.DataFrame(data)
+
+        # Minor pandas 2.0 difference, not supported by older modin
+        sdf.info(buf=modin_buffer, show_counts=False, null_counts=False)
+        pdf.info(buf=pandas_buffer, show_counts=False)
+
+        _assert_info_lines_equal(
+            modin_buffer.getvalue().splitlines(),
+            pandas_buffer.getvalue().splitlines(),
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_info_SNOW_962607():
+    """
+    Test that with single-column dataframes we still perform a properly
+    transposed count
+    """
+    with io.StringIO() as pandas_buffer, io.StringIO() as modin_buffer:
+        data = {"A": [-3, -2, 1]}
+        sdf = pd.DataFrame(data)
+        pdf = native_pd.DataFrame(data)
+
+        # Minor pandas 2.0 difference, not supported by older modin
+        sdf.info(buf=modin_buffer, show_counts=True)
+        pdf.info(buf=pandas_buffer, show_counts=True)
+
+        _assert_info_lines_equal(
+            modin_buffer.getvalue().splitlines(),
+            pandas_buffer.getvalue().splitlines(),
+        )
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
new file mode 100644
index 00000000000..c580a1857b2
--- /dev/null
+++ b/tests/integ/modin/frame/test_insert.py
@@ -0,0 +1,761 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    BASIC_NUMPY_PANDAS_SCALAR_DATA,
+    BASIC_TYPE_DATA1,
+    assert_frame_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture
+def snow_df():
+    return pd.DataFrame(
+        {"col1": ["one", "two", "three"], "col2": ["abc", "pqr", "xyz"]},
+        index=native_pd.Index([5, 1, 0]),
+    )
+
+
+@pytest.fixture(scope="function")
+def native_df():
+    return native_pd.DataFrame(
+        {"col1": ["one", "two", "three"], "col2": ["abc", "pqr", "xyz"]},
+        index=native_pd.Index([5, 1, 0]),
+    )
+
+
+@pytest.mark.parametrize(
+    "native_value",
+    [
+        # DataFrame
+        native_pd.DataFrame({"col1": ["a", "b", "c"]}),
+        # DataFrame with index
+        native_pd.DataFrame({"col1": ["a", "b", "c"]}, index=pd.Index([0, 99, 100])),
+        # DataFrame with less rows
+        native_pd.DataFrame({"col1": ["a"]}),
+        # DataFrame with more rows
+        native_pd.DataFrame(
+            {"col1": ["a", "b", "c", "d", "e"]}, index=pd.Index([4, 3, 2, 1, 0])
+        ),
+    ],
+)
+@sql_count_checker(query_count=5, join_count=3)
+def test_insert_snowpark_pandas_objects(native_df, native_value):
+    snow_df = pd.DataFrame(native_df)
+    value = pd.DataFrame(native_value)
+
+    # Verify insert with DataFrame
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(
+            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        inplace=True,
+    )
+
+    # Verify insert with Series
+    value = pd.Series(native_df["col1"])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(
+            0, "col4", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "native_value",
+    [
+        # Note: Target dataFrame index values are [5, 1, 0]
+        # DataFrame with duplicate index value 1
+        native_pd.DataFrame({"col1": ["a", "b", "c"]}, index=pd.Index([0, 1, 1])),
+        # Series with duplicate index value 1
+        native_pd.Series(["a", "b", "c"], index=pd.Index([0, 1, 1])),
+        # DataFrame with duplicate index value 2. 2 is not present in target dataframe
+        # so technically this will not lead to one-to-many join but this is still
+        # disallowed in native pandas.
+        native_pd.DataFrame({"col1": ["a", "b", "c"]}, index=pd.Index([0, 2, 2])),
+    ],
+)
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_one_to_many(native_df, native_value):
+    snow_df = pd.DataFrame(native_df)
+    value = pd.DataFrame(native_value)
+
+    # the Snowpark pandas behavior for insert with non-unique index values is different
+    # compare with native pandas. Native pandas raise an ValueError,
+    # "cannot reindex on an axis with duplicate labels". In Snowpark pandas, we are not
+    # able to perform such check and raises an error without trigger an eager evaluation,
+    # and it exposes an left join behavior.
+    snow_df.insert(2, "col3", value)
+    native_snow_res = snow_df.to_pandas().sort_index()
+
+    if isinstance(native_value, native_pd.Series):
+        native_value.name = "series"
+    # We sort the index here, because pandas 2.2.x retains the sort order
+    # of the original dict. https://github.com/pandas-dev/pandas/pull/55696
+    # As we update the API from 2.1.4 to 2.2.x proper we may want to address
+    # this behavior.
+    expected_res = native_df.join(
+        native_value, rsuffix="_x", how="left", sort=False
+    ).sort_index()
+    expected_res.columns = ["col1", "col2", "col3"]
+    assert_frame_equal(native_snow_res, expected_res, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "value, expected_query_count, expected_join_count",
+    [
+        (np.array(["a", "b", "c"]), 2, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"]]), 2, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c"], 2, 1),  # python list
+        ({0: 1, 1: 2, 4: 3}, 1, 1),  # python dict
+        (("a", "b", "c"), 2, 1),  # python tuple
+    ],
+)
+def test_insert_array_like(native_df, value, expected_query_count, expected_join_count):
+    snow_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.insert(0, "col3", value),
+            inplace=True,  # insert operation is always inplace
+        )
+
+
+@pytest.mark.parametrize(
+    "value",
+    BASIC_TYPE_DATA1 + BASIC_NUMPY_PANDAS_SCALAR_DATA,
+)
+def test_insert_scalar(native_df, value):
+    if isinstance(value, bytearray):
+        # Byte array is treated as array-like it's not a scalar.
+        with SqlCounter(query_count=0):
+            pass
+    else:
+        snow_df = pd.DataFrame(native_df)
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.insert(0, "col3", value),
+                inplace=True,  # insert operation is always inplace
+            )
+
+
+@sql_count_checker(query_count=0)
+def test_insert_pandas_types_negative(snow_df):
+    value = native_pd.DataFrame({"col1": ["a", "b", "c"]})
+    # Verify pandas DataFrame is not allowed.
+    msg = (
+        f"{type(value)} is not supported as 'value' argument. Please convert this to Snowpark pandas"
+        r" objects by calling modin.pandas.Series\(\)/DataFrame\(\)"
+    )
+    with pytest.raises(TypeError, match=msg):
+        snow_df.insert(0, "col3", value)
+
+    # Verify pandas Series is not allowed.
+    value = value["col1"]
+    msg = (
+        f"{type(value)} is not supported as 'value' argument. Please convert this to Snowpark pandas"
+        r" objects by calling modin.pandas.Series\(\)/DataFrame\(\)"
+    )
+    with pytest.raises(TypeError, match=msg):
+        snow_df.insert(0, "col3", value)
+
+
+@sql_count_checker(query_count=2)
+def test_insert_dataframe_shape_negative(native_df):
+    # DataFrame with more than one column
+    snow_df = pd.DataFrame(native_df)
+    value = pd.DataFrame({"A": [1, 2, 3], "B": [9, 8, 7]})
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(
+            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Expected a one-dimensional object, got a DataFrame with 2 columns instead",
+    )
+
+
+@pytest.mark.parametrize(
+    "value, expected_query_count",
+    [
+        # NOTE: Accepted numpy array shapes are (N,) or (N, 1) where N = number of rows = 3
+        (np.ones((3, 2)), 0),
+        (np.ones((6, 1)), 1),
+        (np.ones((1, 1)), 1),
+        ([1, 2], 1),  # len < number of rows
+        ((6, 7, 8, 9), 1),  # len > number of rows
+        ({"a", "b", "c"}, 1),  # python set
+    ],
+)
+def test_insert_value_negative(native_df, value, expected_query_count):
+    snow_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.insert(0, "col3", value),
+            expect_exception=True,
+        )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_duplicate_label(native_df):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(0, "col1", ["a", "b", "c"], allow_duplicates=True),
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_insert_duplicate_label_negative(native_df):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(0, "col1", ["a", "b", "c"]),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("loc", [0, 1, 2])
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_loc(native_df, loc):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(loc, "col3", ["a", "b", "c"]),
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "loc, expected_query_count", [(-99, 1), (-1, 1), (99, 1), ("1", 0)]
+)
+def test_insert_loc_negative(native_df, loc, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        snow_df = pd.DataFrame(native_df)
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.insert(loc, "col3", ["a", "b", "c"]),
+            expect_exception=True,
+        )
+
+
+@pytest.mark.parametrize(
+    "value, expected_query_count, expected_join_count",
+    [
+        (np.array(["a", "b", "c", "d"]), 2, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c", "d"], 2, 1),  # python list
+        (("a", "b", "c", "d"), 2, 1),  # python tuple
+        ({(3, 1): 1}, 1, 1),  # python dict
+        ("abc", 1, 0),  # sting scalar
+        (1, 1, 0),  # int scalar
+    ],
+)
+def test_insert_multiindex_array_like_and_scaler(
+    value, expected_query_count, expected_join_count
+):
+    arrays = [[3, 4, 5, 6], [1, 2, 1, 2]]
+    index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r", "s"]}, index=index)
+    native_df = snow_df.to_pandas()
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.insert(0, "col3", value),
+            inplace=True,  # insert operation is always inplace
+        )
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        np.array(["a", "b", "c", "d"]),  # numpy array of shape (N,)
+        ["a", "b", "c", "d"],  # python list
+        ("a", "b", "c", "d"),  # python tuple
+    ],
+)
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_empty_multiindex_frame(value):
+    mi = pd.MultiIndex.from_arrays([np.array([], dtype=int), np.array([], dtype=int)])
+    snow_df = pd.DataFrame([], index=mi)
+    native_df = native_pd.DataFrame([], index=mi)
+    # This behaviour is different from native pandas. Native pandas fails with error
+    # ValueError("Buffer dtype mismatch, expected 'Python object' but got 'long'")
+    with pytest.raises(ValueError):
+        native_df.insert(0, "col3", value)
+    snow_df.insert(0, "col3", value)
+    expected_df = native_pd.DataFrame(
+        value,
+        columns=["col3"],
+        index=pd.Index([(None, None)] * 4),
+    )
+    assert_snowpark_pandas_equal_to_pandas(snow_df, expected_df)
+
+
+@sql_count_checker(query_count=0)
+def test_insert_multiindex_dict_negative():
+    value = {"a": 1, "b": 2, "c": 3}
+    arrays = [["apple", "apple", "banana", "banana"], [1, 2, 1, 2]]
+    index = pd.MultiIndex.from_arrays(arrays, names=["first", "second"])
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r", "s"]}, index=index)
+    # This is different behavior from native pandas. In native pandas new column
+    # is inserted with null values but in Snowpark pandas we raise error.
+    with pytest.raises(
+        ValueError,
+        match="Number of index levels of inserted column are different from frame index",
+    ):
+        snow_df.insert(0, "cole3", value)
+
+
+@pytest.mark.parametrize(
+    "df_index, value_index",
+    [
+        ([3, 0, 4], [1, 2, 3]),
+        ([(1, 0), (1, 2), (2, 2)], [(1, 1), (1, 2), (2, 2)]),
+        ([1.0, 2.5, 3.0], [1, 2, 3]),  # Long and Double can be joined
+    ],
+)
+@sql_count_checker(query_count=4, join_count=1)
+def test_insert_compatible_index(df_index, value_index):
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
+    value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.insert(
+            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        inplace=True,  # insert operation is always inplace
+    )
+
+
+@pytest.mark.parametrize(
+    "df_index, value_index",
+    [
+        ([3, 2, 1], [(1, 0, 1), (1, 2, 3), (2, 1, 0)]),  # length mismatch 1 != 3
+        (
+            [(3, 1), (2, 1), (1, 2)],
+            [(1, 0, 1), (1, 2, 3), (2, 1, 0)],
+        ),  # length mismatch 2 != 3
+        ([1, 2, 3], [(1, 0), (1, 2), (2, 2)]),  # 1 != 2
+        ([(1, 0), (1, 2), (2, 2)], [(1, 2, 3), (3, 4, 5), (6, 5, 4)]),  # 2 != 3
+        ([(1, 2, 3), (3, 4, 5), (6, 5, 4)], [3, 1, 2]),  # length mismatch 3 != 1
+        (
+            [(1, 1), (1, 2), (2, 2)],
+            ["(1, 0)", "(1, 2)", "(2, 2)"],
+        ),  # length and type mismatch
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
+    value = pd.DataFrame({"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index))
+    # This is different behavior from native pandas. Native pandas in some cases
+    # insert new column with null values but in Snowpark pandas we always raise error.
+    with pytest.raises(
+        ValueError,
+        match="Number of index levels of inserted column are different from frame index",
+    ):
+        snow_df.insert(0, "col3", value)
+
+
+@pytest.mark.parametrize(
+    "df_index, value_index, expected_index",
+    [
+        (["1", "2", "3"], [1, 2, 3], [1.0, 2.0, 3.0]),  # type mismatch str != long
+        ([1, 2, 3], ["1", "2", "3"], [1.0, 2.0, 3.0]),  # type mismatch long != str
+        (
+            [True, False, True],
+            [1, 0, 3],
+            [True, False, True],
+        ),  # type mismatch boolean != long
+    ],
+)
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_index_type_mismatch(df_index, value_index, expected_index):
+    # Note: This is different behavior than native pandas. In native pandas when
+    # index datatype mismatch new columns in inserted will all NULL values.
+    # But in Snowpark pandas we don't perform any client checks on type of index columns
+    # and simply call join on them. snowflake backend will cast them whenever possible
+    # otherwise, raise a SQLException.
+    snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
+    value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
+    expected_df = native_pd.DataFrame(
+        {"col1": ["p", "q", "r"], "col2": ["x", "y", "z"]},
+        index=native_pd.Index(expected_index),
+    )
+    snow_df.insert(1, "col2", value)
+    assert_snowpark_pandas_equal_to_pandas(snow_df, expected_df)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_insert_with_null_index_values():
+    snow_df = pd.DataFrame(
+        {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", None, "b", None])
+    )
+    value = pd.Series([8, 4], native_pd.Index(["a", None]))
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.insert(
+            0, "col3", value if isinstance(df, pd.DataFrame) else value.to_pandas()
+        ),
+        inplace=True,  # insert operation is always inplace
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_insert_multiple_null():
+    snow_df = pd.DataFrame(
+        {"A": ["p", "q", "r", "s"]}, native_pd.Index(["a", "b", "c", "d"])
+    )
+    native_df = snow_df.to_pandas()
+    value = pd.Series([8, None, None, 1], native_pd.Index(["a", None, None, "d"]))
+    native_value = value.to_pandas()
+
+    # the Snowpark pandas behavior for insert with non-unique index values is different
+    # compare with native pandas. Native pandas raise an ValueError,
+    # "cannot reindex on an axis with duplicate labels". In Snowpark pandas, we are not
+    # able to perform such check and raises an error without trigger an eager evaluation,
+    # and it exposes an left join behavior.
+    snow_df.insert(1, "col3", value)
+
+    native_value.name = "series"
+    expected_res = native_df.join(native_value, rsuffix="_x", how="left", sort=False)
+    expected_res.columns = ["A", "col3"]
+    assert_frame_equal(snow_df, expected_res)
+
+
+@pytest.mark.parametrize(
+    "index, value, expected_query_count, expected_join_count",
+    [
+        ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 1),
+        ([1, 2], [3, 4], 2, 1),
+    ],
+)
+def test_insert_into_empty_dataframe_with_index(
+    index, value, expected_query_count, expected_join_count
+):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        snow_df = pd.DataFrame(index=index)
+        native_df = native_pd.DataFrame(index=index)
+
+        def helper(df, value):
+
+            if isinstance(df, pd.DataFrame):
+                # convert to snow series if given as native series
+                if isinstance(value, native_pd.Series):
+                    value = pd.Series(data=value.values, index=value.index)
+
+                df.insert(0, "X", value)
+
+            if isinstance(df, native_pd.DataFrame):
+                df.insert(0, "X", value)
+
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: helper(df, value),
+            inplace=True,
+        )
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        10,
+        [1, 2, 3],
+        np.array([1, 2]),
+        native_pd.Series(["abc", 4, 9.0]),
+        native_pd.Series([7, 4, 1, 2, 3]).sort_values(),
+        native_pd.Series(
+            [8, None, None, 1], native_pd.Index([5, 10, 15, 20], dtype=int)
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "data,columns,expected_query_count,expected_join_count",
+    [
+        # Use [] and None to initialize dataframes with no columns. Test for both cases here
+        # because None triggers default behavior (no columns) and [] explicitly defines no columns.
+        ([], [], 1, 1),
+        ([], None, 1, 1),
+        (None, [], 1, 1),
+        ([], ["A", "B", "C"], 1, 1),
+    ],
+)
+def test_insert_into_empty_dataframe(
+    value, data, columns, expected_query_count, expected_join_count
+):
+    if isinstance(value, int):
+        expected_join_count = 0
+    if isinstance(value, list) or isinstance(value, np.ndarray):
+        expected_query_count = 2
+    snow_df = pd.DataFrame(data=data, columns=columns)
+    native_df = native_pd.DataFrame(data=data, columns=columns)
+
+    def helper(df):
+        if isinstance(value, native_pd.Series):
+            snow_value = pd.Series(value)
+            index_mismatch = False
+        else:
+            snow_value = value
+            # Inserting a like-like column into empty dataframe behaves differently in
+            # Snowpark pandas. Output frame's index will have all null values. To match
+            # with native pandas we call reset_index.
+            index_mismatch = True
+
+        if isinstance(df, pd.DataFrame):
+            df.insert(0, "X", snow_value)
+            if index_mismatch:
+                df.reset_index(drop=True, inplace=True)
+        if isinstance(df, native_pd.DataFrame):
+            df.insert(0, "X", value)
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            helper,
+            inplace=True,
+            check_index_type=False,
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_insert_into_empty_dataframe_index_dtype_mismatch():
+    native_ser = native_pd.Series(
+        [8, None, None, 1], native_pd.Index(["a", None, None, "d"])
+    )
+    snow_ser = pd.Series(native_ser)
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame()
+    # Snowpark pandas cannot insert value w/ object index into empty frame, which by default has an int index
+    native_df.insert(0, "X", native_ser)
+    snow_df.insert(0, "X", snow_ser)
+    with pytest.raises(SnowparkSQLException):
+        snow_df.to_pandas()
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_insert_empty_list_into_empty_dataframe():
+    snow_df = pd.DataFrame()
+    native_df = native_pd.DataFrame()
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(0, "X", []),
+        inplace=True,  # insert operation is always inplace
+    )
+
+
+@pytest.mark.parametrize("loc", [-30, -1, 4])
+@pytest.mark.parametrize(
+    "data,columns",
+    [
+        ([], []),
+        ([], None),
+        (None, []),
+        ([], ["A", "B", "C"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_insert_into_empty_dataframe_negative(loc, data, columns):
+    snow_df = pd.DataFrame(data=data, columns=columns)
+    native_df = native_pd.DataFrame(data=data, columns=columns)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.insert(loc, "X", [1, 2, 3]),
+        expect_exception=True,
+        assert_exception_equal=True,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_insert_into_empty_df_with_single_column():
+
+    series = native_pd.Series([1, 2], index=[3, 2])
+    df = native_pd.DataFrame({"col1": []}, index=pd.Index([], dtype="int64"))
+
+    def helper(df):
+        temp_series = series
+        if isinstance(df, pd.DataFrame):
+            df = pd.DataFrame(df)
+            temp_series = pd.Series(temp_series)
+
+        df.insert(0, "X", temp_series)
+
+    eval_snowpark_pandas_result(pd.DataFrame(df), df, helper, inplace=True)
+
+
+@pytest.mark.parametrize("insert_label", ["x", ("x",), ("x", "y"), ("x", "y", "z")])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["a", "b"],  # single index
+        [("a",), ("b",)],  # single index of tuples len=1
+        [("a", 1), ("b", 2)],  # single index of tuples len=2
+        [("a", 1, 1), ("b", 2, 2)],  # single index of tuples len=3
+        native_pd.MultiIndex.from_tuples([("a",), ("b",)]),  # multi-index len=1
+        native_pd.MultiIndex.from_tuples([("a", 1), ("b", 2)]),  # multi-index len=2
+        native_pd.MultiIndex.from_tuples(
+            [("a", 1, 1), ("b", 2, 2)]
+        ),  # multi-index len=3
+    ],
+)
+def test_insert_multiindex_column(snow_df, columns, insert_label):
+    snow_df.columns = columns
+    native_df = snow_df.to_pandas()
+    if (
+        snow_df.columns.nlevels == 1
+        or not isinstance(insert_label, tuple)
+        or len(insert_label) == snow_df.columns.nlevels
+    ):
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.insert(0, insert_label, "abc"),
+                inplace=True,  # insert operation is always inplace
+            )
+    else:
+        # Raise error when, column index has multiple levels and insert_labels is a tuple
+        # of different length.
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.insert(0, insert_label, "abc"),
+                inplace=True,  # insert operation is always inplace
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match="Item must have length equal to number of levels",
+            )
+
+
+@pytest.mark.parametrize("insert_label", [("x",), ("x", "y"), ("x", "y", "z")])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        native_pd.MultiIndex.from_tuples([("a", 1), ("b", 2)]),  # multi-index len=2
+        native_pd.MultiIndex.from_tuples(
+            [("a", 1, 1), ("b", 2, 2)]
+        ),  # multi-index len=3
+    ],
+)
+def test_insert_multiindex_column_negative(snow_df, columns, insert_label):
+    # Raise error when, column index has multiple levels and insert_labels is a tuple
+    # of different length.
+    snow_df.columns = columns
+    native_df = snow_df.to_pandas()
+    if len(insert_label) != snow_df.columns.nlevels:
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.insert(0, insert_label, "abc"),
+                inplace=True,  # insert operation is always inplace
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match="Item must have length equal to number of levels",
+            )
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.insert(0, insert_label, "abc"),
+                inplace=True,  # insert operation is always inplace
+            )
+
+
+@pytest.mark.parametrize(
+    "index_values, other_index_values, expect_mismatch",
+    [
+        [["a", "b", "c", "d", "e"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "c", "d", "e"], ["x", "y", "z", "w", "u"], False],
+        [["a", "b", "b", "d", "e"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "e"], ["a", "b", "b", "d", "e"], False],
+        [["a", "b", "b", "d", "e"], ["b", "b", "a", "d", "e"], True],
+        [["a", "b", "b", "d", "d"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "d"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "d"], ["a", "b", "b", "d", "d"], False],
+        [["a", "b", "b", "d", "d"], ["b", "a", "d", "b", "d"], True],
+        [["a", "b", "c", "d", "e"], ["a", "b", "b", "d", "e"], True],
+        [["a", "b", "b", "d", "d"], ["a", "b", "b", "d", "e"], True],
+        [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_insert_with_unique_and_duplicate_index_values(
+    index_values, other_index_values, expect_mismatch
+):
+    data = list(range(5))
+    data1 = {"foo": data}
+    data2 = {"bar": [val * 10 for val in data]}
+    index = pd.Index(index_values, name="INDEX")
+    other_index = pd.Index(other_index_values, name="INDEX")
+
+    snow_df1 = pd.DataFrame(data1, index=index)
+    snow_df2 = pd.DataFrame(data2, index=other_index)
+
+    native_df1 = native_pd.DataFrame(data1, index=index)
+    native_df2 = native_pd.DataFrame(data2, index=other_index)
+
+    def insert_op(df):
+        df.insert(
+            0,
+            column="bar",
+            value=native_df2["bar"]
+            if isinstance(df, native_pd.DataFrame)
+            else snow_df2["bar"],
+        )
+        return df
+
+    if not expect_mismatch:
+        eval_snowpark_pandas_result(
+            snow_df1,
+            native_df1,
+            lambda df: insert_op(df),
+        )
+    else:
+        # the Snowpark pandas behavior for insert with non-unique index values is different
+        # compare with native pandas. Native pandas raise an ValueError,
+        # "cannot reindex on an axis with duplicate labels". In Snowpark pandas, we are not
+        # able to perform such check and raises an error without trigger an eager evaluation,
+        # and it exposes an left join behavior.
+        snow_res = insert_op(snow_df1)
+        expected_res = native_df1.join(native_df2["bar"], how="left", sort=False)
+        expected_res = expected_res[["bar", "foo"]]
+        assert_frame_equal(snow_res, expected_res, check_dtype=False)
diff --git a/tests/integ/modin/frame/test_isin.py b/tests/integ/modin/frame/test_isin.py
new file mode 100644
index 00000000000..bf536b61594
--- /dev/null
+++ b/tests/integ/modin/frame/test_isin.py
@@ -0,0 +1,250 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+from typing import Any
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    eval_snowpark_pandas_result,
+    try_cast_to_snowpark_pandas_dataframe,
+    try_cast_to_snowpark_pandas_series,
+)
+
+# In this file, the following 4 cases how values can be supplied to the
+# Dataframe.isin(values) API are included
+# Case 1: values is list-like
+# Case 2: values is (Snowpark pandas) Series
+# Case 3: values is (Snowpark pandas) DataFrame
+# Case 3: values is dict
+
+# type hint pd.DataFrame | native_pd.DataFrame does not work here, therefore Any is used.
+def _test_isin_with_snowflake_logic(df: Any, values, query_count=0):  # noqa: E302
+
+    # convert to Snowpark pandas API universe
+    if isinstance(df, pd.DataFrame):
+        if isinstance(values, native_pd.Series):
+            values = try_cast_to_snowpark_pandas_series(values)
+        elif isinstance(values, native_pd.DataFrame):
+            values = try_cast_to_snowpark_pandas_dataframe(values)
+    else:
+        # set expected query counts to 0 if native pandas is used.
+        query_count = 0
+
+    with SqlCounter(query_count=query_count):
+        ans = df.isin(values)
+
+        # Following code is to emulate Snowflake behavior:
+        # In Snowflake semantics, preserve nulls. E.g., NULL.isin([NULL]) will yield NULL, but not True/False
+        # similarly, if the values passed to isin contain a single NULL, the result will be NULL
+        if isinstance(ans, pd.DataFrame):
+            ans = ans.to_pandas()
+
+    # mask with N/A from original data
+    mask = (
+        df.to_pandas().isna().values
+        if isinstance(df, pd.DataFrame)
+        else df.isna().values
+    )
+
+    masked_data = np.where(mask, None, ans)
+    ans = native_pd.DataFrame(masked_data, columns=ans.columns, index=ans.index)
+
+    # convert back to use eval function below.
+    if isinstance(df, pd.DataFrame):
+        ans = pd.DataFrame(ans)
+
+    return ans
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        [],
+        (),
+        set(),
+        np.array([]),
+        [1, 2],
+        [None, 1, 2],
+        {1, 2, 1},
+        np.array([None, 2]),
+        ("a",),
+        np.array([None, "b"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "data,columns",
+    [
+        ([[1, 2, 3, None], [10, 10, 10], [None, None, None]], ["a", "b b", 20, None]),
+        ([["a", "b", None], [None, None, None]], ["X", "Y", "Z"]),
+    ],
+)
+def test_isin_with_listlike_scalars(values, data, columns):
+    # test here list-like values, we consider lists, tuples, sets and numpy arrays as list like.
+    # note that the index of the original dataframe here is irrelevant as it is a cell-based operation.
+    native_df = native_pd.DataFrame(data, columns=columns)
+    snow_df = pd.DataFrame(data, columns=columns)
+
+    # SQL count checker found within _test_isin_with_snowflake_logic as additional queries are run
+    # there to test deviating semantics against pandas (conversion from adjusted result to Snowpark pandas again).
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: _test_isin_with_snowflake_logic(df, values, query_count=1),
+    )
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        # To prevent dtype mismatch error, we cast the empty index (default int dtype) to object
+        native_pd.Series([], index=pd.Index([], dtype=object)),
+        native_pd.Series(index=[1, 2, 3]),
+        native_pd.Series([1, 3], index=[10, 11]),
+        native_pd.Series([1, 10], index=[10, 11]),
+        native_pd.Series(
+            ["a", "A", "a", "A", "X", "Z"], index=["b", "c", "a", "d", "f", "e"]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "data,columns,index",
+    [
+        (
+            [[1, 2, 3, None], [10, 11, 10], [None, None, None]],
+            ["a", "b b", 20, None],
+            [1, 11, 90],
+        ),
+        (
+            [["a", "b", None], [None, None, None], ["b", None, None]],
+            ["X", "Y", "Z"],
+            ["a", "b", "c"],
+        ),
+    ],
+)
+def test_isin_with_Series(values, data, columns, index):
+    native_df = native_pd.DataFrame(data, columns=columns, index=index)
+    snow_df = pd.DataFrame(data, columns=columns, index=index)
+
+    # skip when index types are different ( type coercion in Snowpark pandas )
+    if snow_df.index.dtype != values.dtype:
+        pytest.skip("Snowpark pandas does not support different index types")
+
+    # SQL count checker found within _test_isin_with_snowflake_logic as additional queries are run
+    # there to test deviating semantics against pandas (conversion from adjusted result to Snowpark pandas again).
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        # 2 queries: 1 for the isin, 1 extra query to handle empty dataframe special case
+        lambda df: _test_isin_with_snowflake_logic(df, values, query_count=2),
+    )
+
+
+# Examples to test with here are
+# 1. doctest example
+# 2. doctest example modified so isin has a column that's not present in the original dataframe
+# 3. duplicate columns (differing result for each).
+# -> note that duplicates along the index are not allowed in pandas, the index is assumed to be unique.
+@pytest.mark.parametrize(
+    "df,other",
+    [
+        (
+            native_pd.DataFrame(
+                {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
+            ),
+            native_pd.DataFrame(
+                {"num_legs": [8, 3], "num_wings": [0, 2]}, index=["spider", "falcon"]
+            ),
+        ),
+        (
+            native_pd.DataFrame(
+                {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
+            ),
+            native_pd.DataFrame(
+                {"num_legs": [8, 3, 4], "num_wings": [0, 2, 0]},
+                index=["spider", "falcon", "elephant"],
+            ),
+        ),
+        (
+            native_pd.DataFrame(
+                [[4, 2, 2, 0, 99], [2, 0, 2, 2, 99]],
+                columns=["num_legs", "num_wings", "num_legs", "num_wings", "unrelated"],
+                index=["falcon", "dog"],
+            ),
+            native_pd.DataFrame(
+                {
+                    "num_legs": [8, 2, 4],
+                    "num_wings": [0, 2, 0],
+                    "not_existing": [1, 2, 3],
+                },
+                index=["spider", "falcon", "elephant"],
+            ),
+        ),
+    ],
+)
+def test_isin_with_Dataframe(df, other):
+    snow_df = pd.DataFrame(df)
+
+    def eval_dataframe_isin(df):
+        if isinstance(df, pd.DataFrame):
+            values = pd.DataFrame(other)
+        else:
+            values = other
+        #  3 queries: 2 for the isin of which one is caused by set, 1 extra query to handle empty dataframe special case
+        return _test_isin_with_snowflake_logic(df, values, query_count=3)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        df,
+        eval_dataframe_isin,
+    )
+
+
+# doctest example {'num_wings': [0, 3]}
+@pytest.mark.parametrize(
+    "df,values",
+    [
+        (
+            native_pd.DataFrame(
+                {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
+            ),
+            {"num_wings": [0, 3]},
+        )
+    ],
+)
+def test_isin_with_dict(df, values):
+    snow_df = pd.DataFrame(df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        df,
+        lambda df: _test_isin_with_snowflake_logic(df, values, query_count=1),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_isin_duplicate_columns_negative():
+    with pytest.raises(ValueError, match="cannot compute isin with a duplicate axis."):
+        df = pd.DataFrame({"A": [1, 2, 3]})
+        other = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "A"])
+
+        df.isin(other)
+
+
+@sql_count_checker(query_count=0)
+def test_isin_dataframe_values_type_negative():
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "only list-like or dict-like objects are allowed to "
+            "be passed to DataFrame.isin(), you passed a 'str'"
+        ),
+    ):
+        df = pd.DataFrame([1, 2, 3])
+        df.isin(values="abcdef")
diff --git a/tests/integ/modin/frame/test_isna.py b/tests/integ/modin/frame/test_isna.py
new file mode 100644
index 00000000000..48916c6bcfc
--- /dev/null
+++ b/tests/integ/modin/frame/test_isna.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.data import RAW_NA_DF_DATA_TEST_CASES
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    eval_snowpark_pandas_result,
+    update_none_in_df_data_test_cases,
+)
+
+
+def run_dataframe_test_helper(dataframe_input, operation=lambda df: df.isna()):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(dataframe_input), native_pd.DataFrame(dataframe_input), operation
+    )
+
+
+@pytest.mark.parametrize("df_input, test_case_name", RAW_NA_DF_DATA_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_none_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
+
+
+@pytest.mark.parametrize("df_input, test_case_name", RAW_NA_DF_DATA_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_notna_with_none_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input, operation=lambda df: df.notna())
+
+
+@pytest.mark.parametrize("df_input, test_case_name", RAW_NA_DF_DATA_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_isnull_with_none_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input, operation=lambda df: df.isnull())
+
+
+@pytest.mark.parametrize("df_input, test_case_name", RAW_NA_DF_DATA_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_notnull_with_none_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input, operation=lambda df: df.notnull())
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(RAW_NA_DF_DATA_TEST_CASES, np.nan, "np.nan"),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_np_nan_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(RAW_NA_DF_DATA_TEST_CASES, pd.NA, "pd.NA"),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_na_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(RAW_NA_DF_DATA_TEST_CASES, pd.NaT, "pd.NaT"),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_nat_values(df_input, test_case_name):
+    run_dataframe_test_helper(
+        df_input,
+    )
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(RAW_NA_DF_DATA_TEST_CASES, math.nan, "math.NA"),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_math_na_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(
+        RAW_NA_DF_DATA_TEST_CASES,
+        pd.array([1, None], dtype=pd.Int64Dtype())[-1],
+        "pd.array(None)",
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_array_none_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
+
+
+@pytest.mark.parametrize(
+    "df_input, test_case_name",
+    update_none_in_df_data_test_cases(
+        RAW_NA_DF_DATA_TEST_CASES, float("nan"), "float(nan)"
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_float_nan_values(df_input, test_case_name):
+    run_dataframe_test_helper(df_input)
diff --git a/tests/integ/modin/frame/test_iterrows.py b/tests/integ/modin/frame/test_iterrows.py
new file mode 100644
index 00000000000..700d1b4ec27
--- /dev/null
+++ b/tests/integ/modin/frame/test_iterrows.py
@@ -0,0 +1,103 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_series_equal, eval_snowpark_pandas_result
+
+# To generate seeded random data.
+rng = np.random.default_rng(12345)
+
+
+def assert_iterators_equal(snowpark_iterator, native_iterator):
+    # We can't use zip() because the iterators may have a different number of elements. Iterate through all the elements
+    # in the native iterator and check that they match corresponding elements in the Snowpark iterator.
+    for native_result in native_iterator:
+        snowpark_result = next(snowpark_iterator)
+        # Compare the index.
+        assert snowpark_result[0] == native_result[0]
+        # Compare the series.
+        assert_series_equal(snowpark_result[1], native_result[1], check_dtype=False)
+
+    with pytest.raises(StopIteration):
+        # Check that the Snowpark iterator is also empty and therefore not longer than the native iterator.
+        next(snowpark_iterator)
+
+
+@pytest.mark.parametrize(
+    "native_df",
+    [
+        # default index df
+        native_pd.DataFrame([[1, 2, 3, 4], [11, 12, 13, 14], [111, 112, 113, 114]]),
+        # non-default index df
+        native_pd.DataFrame(
+            [[1, 1.5], [2, 2.5], [3, 7.8], [4, 4], [5, 8.9]],
+            columns=["int", "float"],
+            index=["one", "two", "three", "four", "five"],
+        ),
+        # repeated column name df
+        native_pd.DataFrame(
+            [
+                ("falcon", "bird", 389.0),
+                ("parrot", "bird", 24.0),
+                ("lion", "mammal", 80.5),
+                ("monkey", "mammal", np.nan),
+            ],
+            columns=["name", "name", "max_speed"],
+        ),
+        # empty df
+        native_pd.DataFrame([]),
+    ],
+)
+def test_df_iterrows(native_df):
+    # Test that the tuple returned is correct: (index, Series).
+    snowpark_df = pd.DataFrame(native_df)
+    # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
+    # operations performed due to iloc.
+    with SqlCounter(query_count=len(native_df) + 1, join_count=4 * len(native_df)):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            lambda df: df.iterrows(),
+            comparator=assert_iterators_equal,
+        )
+
+
+@sql_count_checker(query_count=8, join_count=28, union_count=7)
+def test_df_iterrows_mixed_types(default_index_native_df):
+    # Same test as above on bigger df with mixed types.
+    # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
+    # operations performed due to iloc.
+    native_df = default_index_native_df
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.iterrows(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=7, join_count=24, union_count=6)
+def test_df_iterrows_multindex_df():
+    # Create df with a MultiIndex index.
+    # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
+    # operations performed due to iloc.
+    arrays = [
+        np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
+        np.array(["one", "two", "one", "two", "one", "two"]),
+    ]
+    native_df = native_pd.DataFrame(rng.random(size=(6, 4)), index=arrays)
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.iterrows(),
+        comparator=assert_iterators_equal,
+    )
diff --git a/tests/integ/modin/frame/test_itertuples.py b/tests/integ/modin/frame/test_itertuples.py
new file mode 100644
index 00000000000..7f396a20d19
--- /dev/null
+++ b/tests/integ/modin/frame/test_itertuples.py
@@ -0,0 +1,170 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.pandas.snow_partition_iterator import PARTITION_SIZE
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+# To generate seeded random data.
+rng = np.random.default_rng(12345)
+
+ITERTUPLES_DF_DATA = [
+    # default index df
+    native_pd.DataFrame([[1, 2, 3, 4], [11, 12, 13, 14], [111, 112, 113, 114]]),
+    # non-default index df
+    native_pd.DataFrame(
+        [[1, 1.5], [2, 2.5], [3, 7.8]],
+        columns=["int", "float"],
+        index=["one", "two", "three"],
+    ),
+    # repeated column name df
+    native_pd.DataFrame(
+        [
+            ("falcon", "bird", 389.0),
+            ("monkey", "mammal", -100),
+        ],
+        columns=["name", "name", "max_speed"],
+    ),
+    # column name with spaces df
+    native_pd.DataFrame([[1, 1.5], [2, 2.5], [3, 7.8]], columns=["i nt", "flo at"]),
+    # empty df
+    native_pd.DataFrame([]),
+]
+
+
+def assert_iterators_equal(snowpark_iterator, native_iterator):
+    # Iterators should have the same number of elements.
+    snowpark_result = list(snowpark_iterator)
+    native_result = list(native_iterator)
+    assert snowpark_result == native_result
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.parametrize("native_df", ITERTUPLES_DF_DATA)
+def test_df_itertuples(native_df):
+    # Test that the namedtuple returned is correct.
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.parametrize("native_df", ITERTUPLES_DF_DATA)
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("name", ["AptlyNamedTuple", None])
+def test_df_itertuples_index_and_name(native_df, index, name):
+    # Test that the namedtuple returned is correct.
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_itertuples_multindex_df():
+    # Create df with a MultiIndex index.
+    arrays = [
+        np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
+        np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
+    ]
+    native_df = native_pd.DataFrame(rng.random(size=(8, 4)), index=arrays)
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("name", ["AptlyNamedTuple", None])
+def test_df_itertuples_multindex_df_index_and_name(index, name):
+    # Create df with a MultiIndex index.
+    arrays = [
+        np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
+        np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
+    ]
+    native_df = native_pd.DataFrame(rng.random(size=(8, 4)), index=arrays)
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.xfail(
+    raises=AssertionError,
+    strict=True,
+    reason="Native pandas retains the tuple value while Snowpark pandas converts it to list.",
+)
+def test_df_itertuples_tuple_data_negative():
+    native_df = native_pd.DataFrame(
+        {
+            "F": [(1,), (2,), (3,)],
+        }
+    )
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+@pytest.mark.xfail(
+    raises=AssertionError,
+    strict=True,
+    reason="Native pandas retains the np.nan value while Snowpark pandas converts it to None.",
+)
+def test_df_itertuples_nan_data_negative():
+    native_df = native_pd.DataFrame({"A": [np.nan]})
+    snowpark_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df.itertuples(),
+        comparator=assert_iterators_equal,
+    )
+
+
+@pytest.mark.parametrize("size", [100000, 10000, PARTITION_SIZE])
+@pytest.mark.skipif(running_on_public_ci(), reason="slow test")
+def test_df_itertuples_large_df(size):
+    data = rng.integers(low=-1500, high=1500, size=size)
+    native_df = native_pd.DataFrame(data)
+    snowpark_df = pd.DataFrame(native_df)
+    query_count = (np.floor(size / PARTITION_SIZE) + 1) * 6
+    with SqlCounter(
+        query_count=query_count,
+        join_count=0,
+        high_count_expected=True,
+        high_count_reason="DataFrame spans multiple iteration partitions, each of which requires 6 queries",
+    ):
+        eval_snowpark_pandas_result(
+            snowpark_df,
+            native_df,
+            lambda df: df.itertuples(),
+            comparator=assert_iterators_equal,
+        )
diff --git a/tests/integ/modin/frame/test_join.py b/tests/integ/modin/frame/test_join.py
new file mode 100644
index 00000000000..b992ff7951b
--- /dev/null
+++ b/tests/integ/modin/frame/test_join.py
@@ -0,0 +1,280 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+@pytest.fixture
+def left():
+    return pd.DataFrame(
+        {"a": [1, 1, 0, 4]}, index=native_pd.Index([2, 1, 0, 3], name="li")
+    )
+
+
+@pytest.fixture
+def right():
+    return pd.DataFrame({"b": [300, 100, 200]}, index=pd.Index([3, 1, 2], name="ri"))
+
+
+@pytest.fixture(params=["left", "inner", "right", "outer"])
+def how(request):
+    """
+    how keyword to pass to merge.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """
+    sort keyword to pass to merge.
+    """
+    return request.param
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_join_index_to_index(left, right, how, sort):
+    result = left.join(right, how=how, sort=sort)
+    expected = left.merge(right, left_index=True, right_index=True, how=how, sort=sort)
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_join_column_to_index(left, right, how, sort):
+    result = left.join(right, on="a", how=how, sort=sort)
+    expected = left.merge(right, left_on="a", right_index=True, how=how, sort=sort)
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=2)
+def test_join_list_with_on_negative(left, right):
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.join(
+            [right if isinstance(df, pd.DataFrame) else right.to_pandas()], on="a"
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Joining multiple DataFrames only supported for joining on index",
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_join_suffix_on_list_negative():
+    first = pd.DataFrame({"key": [1, 2, 3, 4, 5]})
+    second = pd.DataFrame({"key": [1, 8, 3, 2, 5], "v1": [1, 2, 3, 4, 5]})
+    third = pd.DataFrame({"keys": [5, 2, 3, 4, 1], "v2": [1, 2, 3, 4, 5]})
+
+    # check proper errors are raised
+    msg = "Suffixes not supported when joining multiple DataFrames"
+    with pytest.raises(ValueError, match=msg):
+        first.join([second], lsuffix="y")
+    with pytest.raises(ValueError, match=msg):
+        first.join([second, third], rsuffix="x")
+    with pytest.raises(ValueError, match=msg):
+        first.join([second, third], lsuffix="y", rsuffix="x")
+    with pytest.raises(
+        ValueError, match="Join dataframes have overlapping column labels"
+    ):
+        first.join([second, third])
+
+    # no errors should be raised
+    arr_joined = first.join([third])
+    norm_joined = first.join(third)
+    assert_frame_equal(arr_joined, norm_joined)
+
+
+@pytest.mark.parametrize(
+    "lsuffix, rsuffix", [("_left", None), (None, "_right"), ("_left", "_right")]
+)
+@sql_count_checker(query_count=2, join_count=2)
+def test_join_overlapping_columns(left, lsuffix, rsuffix):
+    result = left.join(left, how="left", lsuffix=lsuffix, rsuffix=rsuffix)
+    expected = left.merge(
+        left, how="left", left_index=True, right_index=True, suffixes=(lsuffix, rsuffix)
+    )
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=1)
+def test_join_overlapping_columns_negative(left):
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.join(df),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="columns overlap but no suffix",
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_join_invalid_how_negative(left):
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.join(df, how="full_outer_join"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="do not recognize join method full_outer_join",
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_join_with_series(left):
+    right = pd.Series([1, 0, 2], name="s")
+    result = left.join(right)
+    expected = left.merge(right, left_index=True, right_index=True, how="left")
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=2)
+def test_join_unnamed_series_negative(left):
+    right = pd.Series([1, 0, 2])
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.join(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas()
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Other Series must have a name",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_join_unnamed_series_in_list_negative(left):
+    right = pd.Series([1, 0, 2])
+    with pytest.raises(ValueError, match="Other Series must have a name"):
+        left.join([right])
+
+
+@sql_count_checker(query_count=2, join_count=4)
+def test_join_list_mixed(left, right):
+    # Join a DataFrame with a list containing both a Series and a DataFrame
+    series = pd.Series([1, 2, 3], name="s")
+    other = [right, series]
+    result = left.join(other)
+    expected = left.join(right).join(series)
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=4, join_count=4)
+def test_join_empty_rows(left, right, how):
+    empty_df = pd.DataFrame(columns=["x", "y"])
+    # empty on left
+    result = left.join(empty_df, how=how)
+    expected = left.merge(empty_df, how=how, left_index=True, right_index=True)
+    assert_frame_equal(result, expected)
+    # empty on right
+    result = empty_df.join(right, how=how)
+    expected = empty_df.merge(right, how=how, left_index=True, right_index=True)
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=4, join_count=4)
+def test_join_empty_columns(left, right, how):
+    empty_df = pd.DataFrame(native_pd.Index([1, 2, 3]))
+    # empty on left
+    result = left.join(empty_df, how=how)
+    expected = left.merge(empty_df, how=how, left_index=True, right_index=True)
+    assert_frame_equal(result, expected)
+    # empty on right
+    result = empty_df.join(right, how=how)
+    expected = empty_df.merge(right, how=how, left_index=True, right_index=True)
+    assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=0)
+def test_join_different_levels_negative(left):
+    # second dataframe
+    columns = native_pd.MultiIndex.from_tuples([("b", ""), ("c", "c1")])
+    right = pd.DataFrame(columns=columns, data=[[1, 33], [0, 44]])
+
+    with pytest.raises(
+        ValueError, match="Can not merge objects with different column levels"
+    ):
+        left.join(right)
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_cross_join(left, right):
+    result = left.join(right, how="cross")
+    expected = left.merge(right, how="cross")
+    assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "lvalues, rvalues, validate",
+    # 'one' should also validate as 'many'. If actual join is one-to-one
+    # validation for '1:1', '1:m', 'm:1' and 'm:m' should succeed.
+    # Similarly, if actual join is '1:m' validation for both '1:m' and 'm:m' should
+    # succeed.
+    [
+        ([1, 2, 3], [4, 3, 1], "1:1"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "1:m"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "m:1"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "m:m"),  # 1:1 join
+        ([1, 2, 3], [1, 3, 1], "1:m"),  # 1:m join
+        ([1, 2, 3], [1, 3, 1], "m:m"),  # 1:m join
+        ([1, 2, 1], [2, 3, 1], "m:1"),  # m:1 join
+        ([1, 2, 1], [2, 3, 1], "m:m"),  # m:1 join
+        ([1, 2, 1], [2, 3, 2], "m:m"),  # m:m join
+    ],
+)
+@sql_count_checker(query_count=12, fallback_count=1, sproc_count=1)
+def test_join_validate(lvalues, rvalues, validate):
+    left = pd.DataFrame({"A": [1, 1, 2]}, index=lvalues)
+    right = pd.DataFrame({"B": [1, 4, 2]}, index=rvalues)
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.join(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas(),
+            validate=validate,
+        ),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "lvalues, rvalues, validate",
+    [
+        ([1, 2, 3], [1, 3, 1], "1:1"),  # 1:m join
+        ([1, 2, 3], [1, 3, 1], "m:1"),  # 1:m join
+        ([1, 2, 1], [2, 3, 1], "1:1"),  # m:1 join
+        ([1, 2, 1], [2, 3, 1], "1:m"),  # m:1 join
+        ([1, 2, 1], [2, 3, 2], "1:1"),  # m:m join
+        ([1, 2, 1], [2, 3, 2], "1:m"),  # m:m join
+        ([1, 2, 1], [2, 3, 2], "m:1"),  # m:m join
+    ],
+)
+@sql_count_checker(query_count=5)
+def test_join_validate_negative(lvalues, rvalues, validate):
+    left = pd.DataFrame({"A": [1, 1, 2]}, index=lvalues)
+    right = pd.DataFrame({"B": [1, 4, 2]}, index=rvalues)
+    # TODO: SNOW-863059 expect MergeError instead of SnowparkSqlException
+    with pytest.raises(SnowparkSQLException, match="Merge keys are not unique"):
+        left.join(right, validate=validate)
diff --git a/tests/integ/modin/frame/test_len.py b/tests/integ/modin/frame/test_len.py
new file mode 100644
index 00000000000..1adeec50caa
--- /dev/null
+++ b/tests/integ/modin/frame/test_len.py
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.mark.parametrize(
+    "sample, expected_len",
+    [
+        ({"a": []}, 0),
+        ({"a": [1, 2]}, 2),
+        ({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, 2),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_len(sample, expected_len):
+    snow = pd.DataFrame(sample)
+    native = native_pd.DataFrame(sample)
+    assert len(snow) == expected_len
+    assert len(snow) == len(native)
diff --git a/tests/integ/modin/frame/test_loc.py b/tests/integ/modin/frame/test_loc.py
new file mode 100644
index 00000000000..f9de4c9b071
--- /dev/null
+++ b/tests/integ/modin/frame/test_loc.py
@@ -0,0 +1,3800 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import random
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame
+from pandas._libs.lib import is_bool, is_scalar
+from pandas.errors import IndexingError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+    generate_a_random_permuted_list_exclude_self,
+)
+
+EMPTY_LIST_LIKE_VALUES = [
+    [],
+    pd.Index([]),
+    np.array([]),
+    native_pd.Series([]),
+]
+
+
+@pytest.fixture(params=[True, False])
+def use_default_index(request):
+    return request.param
+
+
+@pytest.fixture(params=["series", "list", "array", "index"], scope="module")
+def key_type(request):
+    return request.param
+
+
+boolean_indexer = [
+    [True, True, False, False, False, True, True],
+    np.array([True, True, False, False, False, True, True]),
+    native_pd.Index([True, True, False, False, False, True, True]),
+]
+row_inputs = [
+    ["a", "a", "c", "c", "b"],
+    native_pd.Index(["c", "a", "a", "b", "a"]),
+    slice("b", "f"),
+    [True, False, True, False, True, True, True],
+    "a",
+] + boolean_indexer
+
+col_inputs = [
+    slice("B", "E", 2),
+    slice("F", "B", -3),
+    "A",
+    ["A", "A", "C", "C", "B"],
+    native_pd.Index(["A", "A", "C", "C", "B"]),
+    np.array(["C", "A", "B"]),
+    ("A", "B"),
+]
+len_mismatch_boolean_indexer = [
+    [True],
+    [True] * 8,
+    np.array([], dtype=bool),
+    native_pd.Index([], dtype=bool),
+]
+row_negative_inputs = [
+    (("A",), ("A",), ("A",)),  # nested tuple
+]
+snowpark_pandas_row_and_col_inputs = [
+    "empty_series",
+]
+snowpark_pandas_col_inputs = [
+    "series[label]_col",
+    "series[bool]_col",
+    "multi_index_series_col",
+] + snowpark_pandas_row_and_col_inputs
+list_like_time_col_inputs = [
+    ["2023-01-01"],
+    ["2023-01-01 03:00:00+03:00"],
+]
+diff2native_negative_row_inputs = [
+    # set: same error type and message as pandas-2.0
+    (
+        {1, 3},
+        TypeError,
+        "Passing a set as an indexer is not supported. Use a list instead.",
+    ),
+    # dict: same error type and message as pandas-2.0
+    (
+        {"a": 1},
+        TypeError,
+        "Passing a dict as an indexer is not supported. Use a list instead.",
+    ),
+    (
+        native_pd.Series([2, 4]),
+        TypeError,
+        "Please convert this to Snowpark pandas objects by calling modin",
+    ),
+    (
+        native_pd.DataFrame(),
+        TypeError,
+        "Please convert this to Snowpark pandas objects by calling modin",
+    ),
+]
+
+out_of_bound_col_inputs = [["C", "A", "A", "y", "y"], "x"]
+
+negative_snowpark_pandas_input_keys = [
+    "dataframe",
+]
+
+snowpark_pandas_int_index_row_inputs = [
+    "int_float",
+]
+
+ITEM_TYPE_LIST_CONVERSION = [
+    ["list", lambda x: x],
+    ["array", lambda x: np.array(x)],
+    ["tuple", lambda x: tuple(x)],
+    ["index", lambda x: pd.Index(x)],
+]
+
+
+@pytest.mark.parametrize(
+    "row",
+    row_inputs,
+)
+@pytest.mark.parametrize(
+    "col",
+    col_inputs,
+)
+def test_df_loc_get_tuple_key(
+    row, col, str_index_snowpark_pandas_df, str_index_native_df
+):
+    with SqlCounter(
+        query_count=2
+        if is_scalar(row)
+        or isinstance(row, tuple)
+        or is_scalar(col)
+        or isinstance(col, tuple)
+        else 1
+    ):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_df,
+            str_index_native_df,
+            lambda df: df.loc[row, col],
+        )
+
+
+@pytest.mark.parametrize(
+    "row",
+    [
+        lambda x: ["a", "c"],
+    ],
+)
+@pytest.mark.parametrize(
+    "col",
+    [
+        lambda x: ["A", "C"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_callable_key(
+    row, col, str_index_snowpark_pandas_df, str_index_native_df
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_df,
+        str_index_native_df,
+        lambda df: df.loc[row, col],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    col_inputs,
+)
+def test_df_loc_get_col_non_boolean_key(
+    key, str_index_snowpark_pandas_df, str_index_native_df
+):
+    with SqlCounter(query_count=2 if is_scalar(key) or isinstance(key, tuple) else 1):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_df,
+            str_index_native_df,
+            lambda df: df.loc[:, key],
+        )
+    if not is_scalar(key) and not isinstance(key, slice):
+        with SqlCounter(query_count=2):
+            eval_snowpark_pandas_result(
+                str_index_snowpark_pandas_df,
+                str_index_native_df,
+                lambda df: df.loc[
+                    :,
+                    pd.Series(key)
+                    if isinstance(df, pd.DataFrame)
+                    else native_pd.Series(key),
+                ],
+            )
+
+
+@pytest.mark.parametrize(
+    "key",
+    boolean_indexer,
+)
+@sql_count_checker(query_count=3)
+def test_df_loc_get_col_boolean_indexer(
+    key, str_index_snowpark_pandas_df, str_index_native_df
+):
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_df,
+            str_index_native_df,
+            lambda df: df.loc[:, key],
+        )
+
+    with SqlCounter(query_count=2):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_df,
+            str_index_native_df,
+            lambda df: df.loc[
+                :,
+                pd.Series(key, index=str_index_native_df.columns)
+                if isinstance(df, pd.DataFrame)
+                else native_pd.Series(key, index=str_index_native_df.columns),
+            ],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    list_like_time_col_inputs,
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_col_time_df(
+    key, time_column_snowpark_pandas_df, time_column_native_df
+):
+    eval_snowpark_pandas_result(
+        time_column_snowpark_pandas_df,
+        time_column_native_df,
+        lambda df: df.loc[:, key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    snowpark_pandas_int_index_row_inputs,
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_int_index_row_snowpark_pandas_input(
+    key,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    loc_snowpark_pandas_input_map,
+):
+    snow_result = default_index_snowpark_pandas_df.loc[
+        loc_snowpark_pandas_input_map[key][0]
+    ]
+    native_result = default_index_native_df.loc[loc_snowpark_pandas_input_map[key][1]]
+    # Snowpark pandas index type is different from native pandas index type.
+    # In snowpark pandas we generate index columns using
+    # iff(..., left_index, right_index) expression which changes type from int to float.
+    assert_frame_equal(
+        snow_result, native_result, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    snowpark_pandas_col_inputs,
+)
+@sql_count_checker(query_count=2)
+def test_df_loc_get_col_snowpark_pandas_input(
+    key,
+    str_index_snowpark_pandas_df,
+    str_index_native_df,
+    loc_snowpark_pandas_input_map,
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_df,
+        str_index_native_df,
+        lambda df: df.loc[:, loc_snowpark_pandas_input_map[key][0]]
+        if isinstance(df, DataFrame)
+        else df.loc[:, loc_snowpark_pandas_input_map[key][1]],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        ([]),
+        (([], [])),
+        ((slice(None), [])),
+        ((slice(None), slice(None))),
+    ],
+)
+def test_df_loc_get_empty_key(
+    key,
+    empty_snowpark_pandas_df,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            empty_snowpark_pandas_df,
+            native_pd.DataFrame(),
+            lambda df: df.loc[key],
+            comparator=assert_snowpark_pandas_equal_to_pandas,
+            check_column_type=False,
+        )
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.loc[key],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    row_negative_inputs,
+)
+@sql_count_checker(query_count=0)
+def test_df_loc_get_row_negative_same2native(
+    key, str_index_snowpark_pandas_df, str_index_native_df
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_df,
+        str_index_native_df,
+        lambda df: df.loc[key],
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    len_mismatch_boolean_indexer,
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_col_len_mismatch_boolean_indexer(
+    key, str_index_snowpark_pandas_df, str_index_native_df
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_df,
+        str_index_native_df,
+        lambda df: df.loc[:, key]
+        if isinstance(df, pd.DataFrame)
+        else df.iloc[:, 0 : len(key)].loc[:, key[: len(df)]],
+    )
+
+
+@pytest.mark.parametrize(
+    "key,error_type,error_msg",
+    diff2native_negative_row_inputs,
+)
+@sql_count_checker(query_count=0)
+def test_df_loc_get_negative_row_diff2native(
+    key, error_type, error_msg, str_index_snowpark_pandas_df
+):
+    with pytest.raises(
+        error_type,
+        match=error_msg,
+    ):
+        _ = str_index_snowpark_pandas_df.loc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    out_of_bound_col_inputs,
+)
+def test_df_loc_get_out_of_bound_col(
+    key, str_index_native_df, str_index_snowpark_pandas_df
+):
+    with SqlCounter(query_count=2 if is_scalar(key) else 1):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_df,
+            str_index_native_df,
+            lambda df: df.loc[
+                :,
+                key
+                if isinstance(df, pd.DataFrame)
+                else [k for k in key if k in str_index_native_df.columns],
+            ],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    negative_snowpark_pandas_input_keys,
+)
+@sql_count_checker(query_count=0)
+def test_df_loc_get_negative_snowpark_pandas_input(
+    key,
+    str_index_snowpark_pandas_df,
+    negative_loc_snowpark_pandas_input_map,
+    str_index_native_df,
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_df,
+        str_index_native_df,
+        lambda df: df.loc[negative_loc_snowpark_pandas_input_map[key][0]]
+        if isinstance(df, DataFrame)
+        else df.loc[negative_loc_snowpark_pandas_input_map[key][1]],
+        expect_exception=True,
+    )
+
+
+@pytest.fixture(scope="function")
+def mi_table_df():
+    tuples = [
+        ("mark i", "mark v"),
+        ("mark i", "mark vi"),
+        ("sidewinder", "mark i"),
+        ("sidewinder", "mark ii"),
+        ("viper", "mark ii"),
+        ("viper", "mark iii"),
+    ]
+    index = pd.MultiIndex.from_tuples(tuples)
+    values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]]
+    tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
+    columns = pd.MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
+
+    return native_pd.DataFrame(values, columns=columns, index=index)
+
+
+@pytest.mark.parametrize(
+    "key, native_error",
+    [
+        # scalar key behavior: prefix match plus drop level
+        ["fizz1", None],
+        [(("fizz1",)), None],
+        [(("fizz1", "buzz1")), None],
+        ["buzz1", KeyError],
+        # list-like key with non-list-like value behavior: prefix match
+        [["fizz1"], None],  # single value
+        [["fizz1", "fizz2", "fizz1"], None],
+        [
+            ["invalid"],
+            KeyError,
+        ],  # return empty data frame if no match found while native pandas raise error
+        # list-like key with list-like value behavior: exact match
+        [[("fizz1", "buzz1")], None],
+        [[["fizz1", "buzz1"]], None],
+        [[("fizz1", "buzz1"), ["fizz1", "buzz1"], ("fizz2", "buzz2")], None],
+        [
+            [("buzz1",)],
+            AssertionError,
+        ],  # return empty data frame since no exact match found
+    ],
+)
+def test_mi_df_loc_get_non_boolean_list_col_key(mi_table_df, key, native_error):
+    df = pd.DataFrame(mi_table_df)
+    if native_error:
+        query_count = 0
+    else:
+        # other list like key
+        query_count = 1
+    with SqlCounter(query_count=query_count):
+        if native_error:
+            with pytest.raises(native_error):
+                _ = mi_table_df.loc[:, key]
+            assert df.loc[:, key].empty
+        else:
+            eval_snowpark_pandas_result(
+                df,
+                mi_table_df,
+                lambda df: df.loc[:, key],
+            )
+
+
+@pytest.mark.parametrize(
+    "key, native_error",
+    [
+        # scalar key behavior: prefix match plus drop level
+        ["mark i", None],
+        [(("mark i",)), None],
+        [(("mark i", "mark vi"), slice(None)), None],
+        ["mark ii", KeyError],
+        # list-like key with non-list-like value behavior: prefix match
+        [["mark i"], None],  # single value
+        [["viper", "mark i", "viper"], None],
+        [
+            ["invalid"],
+            KeyError,
+        ],  # return empty data frame if no match found while native pandas raise error
+        # list-like key with list-like value behavior: exact match
+        [[("mark i", "mark vi")], None],
+        [[["mark i", "mark vi"]], None],
+        [[("mark i", "mark vi"), ["mark i", "mark vi"], ("viper", "mark ii")], None],
+        [
+            [("mark i",)],
+            AssertionError,
+        ],  # return empty data frame since no exact match found
+    ],
+)
+def test_mi_df_loc_get_non_boolean_list_row_key(mi_table_df, key, native_error):
+    df = pd.DataFrame(mi_table_df)
+    if isinstance(key, tuple) or is_scalar(key):
+        # it uses filter so no join count
+        query_count, join_count = 1, 0
+        if isinstance(key, tuple) and len(key) == 2:
+            # multiindex full lookup requires squeeze to run
+            query_count += 1
+    else:
+        # other list like key
+        query_count, join_count = 1, 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        if native_error:
+            with pytest.raises(native_error):
+                _ = mi_table_df.loc[key]
+            assert df.loc[key].empty
+        else:
+            eval_snowpark_pandas_result(
+                df,
+                mi_table_df,
+                lambda df: df.loc[key],
+            )
+
+
+@pytest.mark.parametrize(
+    "row",
+    [
+        # scalar key behavior: prefix match plus drop level
+        "mark i",
+        (("mark i",)),
+        (("mark i", "mark vi")),
+        # list-like key with non-list-like value behavior: prefix match
+        ["mark i"],
+        ["viper", "mark i", "viper"],
+        # list-like key with list-like value behavior: exact match
+        [("mark i", "mark vi")],
+        [["mark i", "mark vi"]],
+        [["mark i", "mark vi", "oversize"]],
+        [("mark i", "mark vi"), ["mark i", "mark vi"], ("viper", "mark ii")],
+        # empty tuple
+        (()),
+        (("mark i", slice(None))),
+        ((slice(None), slice(None))),
+        ((slice(None), "mark vi")),
+        ((slice("mark i", "sidewinder"), "mark vi")),
+        ((slice("a", "z"),)),
+    ],
+)
+@pytest.mark.parametrize(
+    "col",
+    [
+        # scalar key behavior: prefix match plus drop level
+        "fizz1",
+        (("fizz1",)),
+        (("fizz1", "buzz1")),
+        # list-like key with non-list-like value behavior: prefix match
+        ["fizz1"],  # single value
+        ["fizz1", "fizz2", "fizz1"],
+        # list-like key with list-like value behavior: exact match
+        [("fizz1", "buzz1")],
+        [["fizz1", "buzz1"]],
+        [["fizz1", "buzz1", "oversize"]],
+        [("fizz1", "buzz1"), ["fizz1", "buzz1"], ("fizz2", "buzz2")],
+        # empty tuple
+        (()),
+        ((slice(None), "buzz1")),
+        ((slice(None), slice(None))),
+        ((slice("fizz0", "fizz1"), slice("buzz1", "buzz2"))),
+        (("fizz1", slice(None))),
+    ],
+)
+def test_mi_df_loc_get_non_boolean_list_tuple_key(mi_table_df, row, col):
+    df = pd.DataFrame(mi_table_df)
+    if isinstance(row, tuple) or is_scalar(row):
+        # it uses filter so no join count
+        query_count, join_count = 1, 0
+        if (
+            isinstance(row, tuple)
+            and len(row) == 2
+            and not any(isinstance(r, slice) for r in row)
+        ):
+            # multiindex full lookup requires squeeze to run
+            query_count += 1
+    else:
+        # other list like key
+        query_count, join_count = 1, 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        if (
+            isinstance(row, tuple)
+            and len(row) == 2
+            and not any(isinstance(r, slice) for r in row)
+            and isinstance(col, tuple)
+            and len(col) == 2
+            and not any(isinstance(c, slice) for c in col)
+        ):
+            assert df.loc[row, col] == mi_table_df.loc[row, col]
+        else:
+            eval_snowpark_pandas_result(
+                df,
+                mi_table_df,
+                lambda df: df.loc[row, col],
+            )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_mi_df_loc_get_boolean_series_row_key(mi_table_df):
+    df = pd.DataFrame(mi_table_df)
+    bool_indexer = [False, True, True, False, False, True]
+
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        lambda df: df.loc[bool_indexer],
+    )
+
+    tuples2 = [
+        ("mark i", "mark vi"),
+        ("mark i", "mark v"),
+        ("sidewinder", "mark ii"),
+        ("sidewinder", "mark i"),
+        ("viper", "mark iii"),
+        ("viper", "mark ii"),
+    ]
+
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        lambda df: df.loc[
+            pd.Series(bool_indexer, index=pd.MultiIndex.from_tuples(tuples2))
+        ]
+        if isinstance(df, DataFrame)
+        else df.loc[
+            native_pd.Series(bool_indexer, index=pd.MultiIndex.from_tuples(tuples2))
+        ],
+    )
+
+
+@sql_count_checker(query_count=3, join_count=0)
+def test_mi_df_loc_get_boolean_series_col_key(mi_table_df):
+    df = pd.DataFrame(mi_table_df)
+    bool_indexer = [False, True]
+
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        lambda df: df.loc[:, bool_indexer],
+    )
+
+    tuples2 = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
+
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        lambda df: df.loc[
+            :, pd.Series(bool_indexer, index=pd.MultiIndex.from_tuples(tuples2))
+        ]
+        if isinstance(df, DataFrame)
+        else df.loc[
+            :, native_pd.Series(bool_indexer, index=pd.MultiIndex.from_tuples(tuples2))
+        ],
+    )
+
+
+@pytest.mark.parametrize(
+    "loc_with_slice",
+    [
+        lambda df: df.loc["mark i":"sidewinder"],
+        lambda df: df.loc[("mark i", "mark v"):],
+        lambda df: df.loc[("mark i",):],
+        lambda df: df.loc[("mark i", "mark v"):("sidewinder", "mark i")],
+        lambda df: df.loc["mark i":("sidewinder", "mark i")],
+        lambda df: df.loc["mark i":"sidewinder":2],
+        lambda df: df.loc["sidewinder":"mark i":-2],
+        lambda df: df.loc["mark v":"mark vi"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_mi_df_loc_slice_row_key(mi_table_df, loc_with_slice):
+    df = pd.DataFrame(mi_table_df)
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        loc_with_slice,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "loc_with_slice",
+    [
+        lambda df: df.loc[:, "fizz1":"fizz3"],
+        lambda df: df.loc[:, ("fizz1", "buzz1"):],
+        lambda df: df.loc[:, ("fizz1",):],
+        lambda df: df.loc[:, ("fizz1", "buzz1"):("fizz2", "buzz2")],
+        lambda df: df.loc[:, "fizz1":("fizz2", "buzz2")],
+        lambda df: df.loc[:, "fizz1"::2],
+        lambda df: df.loc[:, "fizz1"::-2],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_mi_df_loc_slice_col_key(mi_table_df, loc_with_slice):
+    df = pd.DataFrame(mi_table_df)
+    eval_snowpark_pandas_result(
+        df,
+        mi_table_df,
+        loc_with_slice,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        native_pd.Series(
+            [
+                True,
+                False,
+                True,
+            ]
+        ),
+        native_pd.Series(
+            [
+                False,
+                False,
+                False,
+            ]
+        ),
+        native_pd.Series(
+            [
+                False,
+                True,
+                False,
+                True,
+                False,
+                True,
+            ]
+        ),
+        native_pd.Series(
+            [
+                False,
+                True,
+            ]
+        ),
+        native_pd.Series(
+            [
+                0,  # 0 does not exist in item, so the row values will be set to NULL
+                1,
+                2,
+            ]
+        ),
+        native_pd.Series([]),
+        native_pd.Series(
+            [
+                2,
+                1,
+                0,
+                1,
+                2,
+            ]
+        ),  # duplicates with no order
+    ],
+)
+def test_df_loc_set_series_row_key(row_key):
+    df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"])
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["C", "A", "B"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+
+    # test case for df.loc[row_key] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            if row_key.dtype == bool and len(row_key) < len(df):
+                # pandas raises IndexingError if the length of the boolean series row key is less than the number of
+                # rows
+                with pytest.raises(IndexingError, match="Unalignable boolean Series"):
+                    df.loc[row_key] = item
+                _row_key = native_pd.Series(
+                    row_key.tolist() + [False] * (len(df) - len(row_key))
+                )
+            else:
+                _row_key = row_key
+            df.loc[_row_key] = item
+        else:
+            df.loc[pd.Series(row_key)] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+    # test case for df.loc[row_key, :] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            if row_key.dtype == bool and len(row_key) < len(df):
+                # pandas raise TypeError if row key is boolean series with mismatched length
+                with pytest.raises(TypeError, match="unhashable type"):
+                    df.loc[row_key, :] = item
+                _row_key = native_pd.Series(
+                    row_key.tolist() + [False] * (len(df) - len(row_key))
+                )
+            else:
+                _row_key = row_key
+            df.loc[_row_key, :] = item
+        else:
+            df.loc[pd.Series(row_key), :] = pd.DataFrame(item)
+
+    expected_join_count = 4 if not row_key.dtype == bool else 2
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        native_pd.Series(
+            [
+                True,
+                False,
+                True,
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "col_key",
+    [
+        "A",
+        ["A", "B"],
+        ["C", "X"],
+        "X",
+        ["X", "Y"],
+        ["X", "Z"],
+    ],
+)
+@pytest.mark.parametrize(
+    "item",
+    [100, [90, 91]],
+)
+def test_df_loc_set_boolean_row_indexer(row_key, col_key, item):
+    df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"])
+
+    def loc_set_helper(df):
+        df.loc[row_key, col_key] = item
+
+    expected_join_count = (
+        6 if isinstance(col_key, str) and isinstance(item, list) else 1
+    )
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        [
+            True,
+            False,
+            True,
+        ],
+        [
+            False,
+            False,
+            False,
+        ],
+        [
+            False,
+            True,
+            False,
+            True,
+            False,
+            True,
+        ],
+        [
+            False,
+            True,
+        ],
+        [
+            0,  # 0 does not exist in item, so the row values will be set to NULL
+            1,
+            2,
+        ],
+        [],
+        [
+            2,
+            1,
+            0,
+            1,
+            2,
+        ],  # duplicates with no order
+    ],
+)
+@pytest.mark.parametrize("key_type", ["list", "array", "index"])
+def test_df_loc_set_list_like_row_key(row_key, key_type):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]
+    )
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["C", "A", "B"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+
+    expected_join_count = (
+        2 if all(isinstance(i, bool) for i in row_key) and len(row_key) > 0 else 4
+    )
+
+    # test case for df.loc[row_key] = item
+    def key_converter(key):
+        # Convert key to the required type.
+        _key = key
+        if key_type == "index":
+            _key = pd.Index(key)
+        elif key_type == "array":
+            _key = np.array(key)
+        return _key
+
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            if (0 < len(row_key) != len(df)) and is_bool(row_key[0]):
+                # pandas raises IndexError if length of like-like boolean row key is not equal to the number of rows.
+                with pytest.raises(IndexError, match="Boolean index has wrong length"):
+                    df.loc[key_converter(row_key)] = item
+                _row_key = key_converter(row_key + [False] * (len(df) - len(row_key)))[
+                    : len(df)
+                ]
+            else:
+                _row_key = key_converter(row_key)
+            df.loc[_row_key] = item
+        else:
+            _row_key = key_converter(row_key)
+            df.loc[_row_key] = pd.DataFrame(item)
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(native_df), native_df, loc_set_helper, inplace=True
+        )
+
+    # test case for df.loc[row_key, :] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            if (0 < len(row_key) != len(df)) and is_bool(row_key[0]):
+                # pandas raises IndexError if length of like-like boolean row key is not equal to the number of rows.
+                with pytest.raises(IndexError, match="Boolean index has wrong length"):
+                    df.loc[key_converter(row_key)] = item
+                _row_key = key_converter(row_key + [False] * (len(df) - len(row_key)))[
+                    : len(df)
+                ]
+            else:
+                _row_key = key_converter(row_key)
+            df.loc[_row_key, :] = item
+        else:
+            _row_key = key_converter(row_key)
+            df.loc[_row_key, :] = pd.DataFrame(item)
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(native_df), native_df, loc_set_helper, inplace=True
+        )
+
+
+@sql_count_checker(query_count=2, join_count=8)
+def test_df_loc_set_series_and_list_like_row_key_negative(key_type):
+    # This test verifies pandas raise ValueError when row key is out-of-bounds but Snowpandas pandas will ignore the
+    # out-of-bound index
+    df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"])
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["C", "A", "B"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+    # row key with out-of-bound values
+    row_key_with_oob = [
+        3,  # 3 does not exist in df
+        1,
+        2,
+    ]
+    valid_row_key = [
+        1,
+        2,
+    ]
+
+    def key_converter(key):
+        _key = key
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(key)
+        elif key_type == "array":
+            _key = np.array(key)
+        elif key_type == "series":
+            _key = native_pd.Series(key)
+        return _key
+
+    # convert keys to appropriate type
+    row_key_with_oob, valid_row_key = key_converter(row_key_with_oob), key_converter(
+        valid_row_key
+    )
+
+    # test case for df.loc[row_key] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            with pytest.raises(KeyError, match="not in index"):
+                df.loc[row_key_with_oob] = item
+            df.loc[valid_row_key] = item
+        else:
+            _row_key_with_oob = (
+                pd.Series(row_key_with_oob)
+                if key_type == "series"
+                else row_key_with_oob
+            )
+            df.loc[_row_key_with_oob] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+    # test case for df.loc[row_key, :] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            with pytest.raises(KeyError, match="not in index"):
+                df.loc[row_key_with_oob, :] = item
+            df.loc[valid_row_key, :] = item
+        else:
+            _row_key_with_oob = (
+                pd.Series(row_key_with_oob)
+                if key_type == "series"
+                else row_key_with_oob
+            )
+            df.loc[_row_key_with_oob, :] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+LOC_SET_COL_KEYS = [
+    slice(None),
+    ["A"],
+    ["Y"],  # append new column
+    slice("A", "B", None),
+    pd.Index(["A", "C"]),  # more array type
+    np.array(["C", "A"]),
+    [False, True, True, False],
+    np.array([1, 0, 0, 1], dtype=bool),
+    native_pd.Series(
+        [True, False, False, True], index=["B", "A", "D", "C"], dtype=bool
+    ),  # boolean series list
+    # duplicates + new columns, deviating behavior: SNOW-1320623 pandas 2.2.1 upgrade for test:
+    # test_df_loc_set_general_col_key_type
+    ["B", "E", 1, "B", "C", "X", "C", 2, "C"],  #
+    native_pd.Series(["B", "E", 1, "B", "C", "X", "C", 2, "C"]),
+]
+
+
+@pytest.mark.parametrize("col_key", LOC_SET_COL_KEYS)
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        [True, False, True],
+        [
+            0,  # 0 does not exist in item, so the row values will be set to NULL
+            1,
+            2,
+        ],
+    ],
+)
+def test_df_loc_set_general_col_key_type(row_key, col_key, key_type):
+    df = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=["D", "B", "C", "A"]
+    )
+    item = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]],
+        columns=["A", "B", "C", "X"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+
+    def key_converter(df):
+        _row_key = row_key
+        # Convert key to the required type.
+        if key_type == "index":
+            _row_key = pd.Index(_row_key)
+        elif key_type == "array":
+            _row_key = np.array(_row_key)
+        elif key_type == "series":
+            _row_key = (
+                pd.Series(_row_key)
+                if isinstance(df, pd.DataFrame)
+                else native_pd.Series(_row_key)
+            )
+        return _row_key
+
+    # There is a bug in pandas 2.2.0+ behavior; issue here: https://github.com/pandas-dev/pandas/issues/58317
+    # The problem arises with duplicated columns when a column key of the format: [existing column(s), new column,
+    # duplicated existing column(s)].
+    # Some of the existing columns are modified (when they are not supposed to be) and have empty values.
+    # In this test, this happens when the col_key is ["B", "E", 1, "B", "C", "X", "C", 2, "C"] or the Series version.
+    # We get around this by first assigning NaN values before the actual loc testing performed.
+
+    def loc_set_helper(df):
+        # convert row key to appropriate type
+        row_key = key_converter(df)
+        if isinstance(df, native_pd.DataFrame):
+            # 2 is a column only in the col_keys with deviating behavior
+            if isinstance(col_key, (list, native_pd.Series)) and 2 in col_key:
+                # Set the new columns to NaN values to prevent assignment of byte values.
+                df.loc[:, ["E", 1, "X", 2]] = np.nan
+            df.loc[row_key, col_key] = item
+        else:
+            key = (
+                row_key,
+                pd.Series(col_key)
+                if isinstance(col_key, native_pd.Series)
+                else col_key,
+            )
+            df.loc[key] = pd.DataFrame(item)
+
+    query_count, join_count = 1, 2
+    if not all(isinstance(rk_val, bool) for rk_val in row_key):
+        join_count += 2
+    if isinstance(col_key, native_pd.Series):
+        query_count += 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+@pytest.mark.parametrize("col_key", LOC_SET_COL_KEYS)
+def test_df_loc_set_general_col_key_type_with_duplicate_columns(col_key, key_type):
+    df = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]],
+        columns=["A", "B", "C", "C"],  # contains duplicate labels
+    )
+    row_key = [
+        0,  # 0 does not exist in item, so the row values will be set to NULL
+        1,
+        2,
+    ]
+    item = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]],
+        columns=["A", "B", "C", "X"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+
+    def key_converter(df):
+        _row_key = row_key
+        # Convert key to the required type.
+        if key_type == "index":
+            _row_key = pd.Index(_row_key)
+        elif key_type == "array":
+            _row_key = np.array(_row_key)
+        elif key_type == "series":
+            _row_key = (
+                pd.Series(_row_key)
+                if isinstance(df, pd.DataFrame)
+                else native_pd.Series(_row_key)
+            )
+        return _row_key
+
+    def loc_set_helper(df):
+        # convert row key to appropriate type
+        row_key = key_converter(df)
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[row_key, col_key] = item
+        else:
+            key = (
+                row_key,
+                pd.Series(col_key)
+                if isinstance(col_key, native_pd.Series)
+                else col_key,
+            )
+            df.loc[key] = pd.DataFrame(item)
+
+    # pandas raise error if the main frame columns have duplicates when enlargement may happen.
+    query_count, join_count, expect_exception = 0, 0, True
+
+    if (
+        isinstance(col_key, slice)
+        or (
+            (hasattr(col_key, "dtype") and col_key.dtype == bool)
+            or isinstance(col_key[0], bool)
+        )
+        # otherwise, pandas raise ValueError: cannot reindex on an axis with duplicate labels
+        or (df.columns.equals(df.columns.union(col_key)))
+    ):
+        query_count, join_count, expect_exception = 1, 4, False
+    if isinstance(col_key, native_pd.Series):
+        query_count += 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(df),
+            df,
+            loc_set_helper,
+            inplace=True,
+            expect_exception=expect_exception,
+            expect_exception_type=ValueError,
+            expect_exception_match="cannot reindex on an axis with duplicate labels",
+        )
+
+
+@pytest.mark.parametrize(
+    "item",
+    [
+        native_pd.DataFrame(
+            [[91, 92, 93, 94], [94, 95, 96, 97], [97, 98, 99, 100]],
+            columns=["A", "B", "C", "X"],
+            index=[
+                3,  # 3 does not exist in the row key, so it will be skipped
+                2,
+                1,
+            ],
+        ),
+        native_pd.DataFrame(
+            [[91, 92, 93, 94], [94, 95, 96, 97], [97, 98, 99, 100]],
+            columns=["A", "B", "C", "X"],
+            index=[
+                1,  # duplicated index
+                2,
+                1,
+            ],
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=4)
+def test_df_loc_set_general_key_with_duplicate_rows(item, key_type):
+    df = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]],
+        columns=["B", "A", "C", "D"],
+        index=[1, 1, 2],
+    )
+    row_key = [
+        1,  # 0 does not exist in item, so the row values will be set to NULL
+        1,
+    ]
+
+    def key_converter(df):
+        _row_key = row_key
+        # Convert key to the required type.
+        if key_type == "index":
+            _row_key = pd.Index(_row_key)
+        elif key_type == "ndarray":
+            _row_key = np.array(_row_key)
+        elif key_type == "series":
+            _row_key = (
+                pd.Series(_row_key)
+                if isinstance(df, pd.DataFrame)
+                else native_pd.Series(_row_key)
+            )
+        return _row_key
+
+    # test case for df.loc[row_key, :] = item
+    def loc_set_helper(df):
+        # convert row key to appropriate type
+        row_key = key_converter(df)
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[row_key, :] = item
+        else:
+            df.loc[row_key, :] = pd.DataFrame(item)
+
+    if item.index.has_duplicates:
+        # pandas fails to update duplicated rows with duplicated item
+        with pytest.raises(
+            ValueError,
+            match=re.escape("cannot reindex on an axis with duplicate labels"),
+        ):
+            loc_set_helper(df)
+        snow = pd.DataFrame(df)
+        loc_set_helper(snow)
+        # Snowpark pandas won't raise error but the total number of rows will be changed (e.g., from 3 to 5)
+        row_key = [1]
+        item = item[~item.index.duplicated(keep="last")]
+        loc_set_helper(df)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow, df)
+    else:
+        eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=4)
+def test_df_loc_set_duplicate_cols_in_df_and_col_key():
+    df = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=["D", "B", "B", "A"]
+    )
+    row_key = native_pd.Series(
+        [
+            0,  # 0 does not exist in item, so the row values will be set to NULL
+            1,
+            2,
+        ]
+    )
+    col_key = ["B", "B", "A"]
+
+    item = native_pd.DataFrame(
+        [[1, 2, 4, 5, 6], [4, 5, 6, 7, 8]],
+        columns=["A", "B", "C", "D", "E"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+        ],
+    )
+
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[row_key, col_key] = item
+        else:
+            df.loc[pd.Series(row_key), col_key] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(df),
+        df,
+        loc_set_helper,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_df_loc_set_number_of_cols_mismatch_negative():
+    df = native_pd.DataFrame(
+        [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=["D", "B", "B", "A"]
+    )
+    row_key = native_pd.Series(
+        [
+            0,  # 0 does not exist in item, so the row values will be set to NULL
+            1,
+            2,
+        ]
+    )
+
+    item = native_pd.DataFrame(
+        [[1, 2, 4, 5, 6], [4, 5, 6, 7, 8]],
+        columns=["A", "B", "C", "D", "E"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+        ],
+    )
+
+    # test case for df.loc[row_key] = item
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[row_key] = item
+        else:
+            df.loc[pd.Series(row_key)] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(df),
+        df,
+        loc_set_helper,
+        inplace=True,
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=ValueError,
+        expect_exception_match="shape mismatch",
+    )
+
+    item = item.iloc[:, 0:3]
+    eval_snowpark_pandas_result(
+        pd.DataFrame(df),
+        df,
+        loc_set_helper,
+        inplace=True,
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=ValueError,
+        expect_exception_match="shape mismatch",
+    )
+
+
+@pytest.mark.parametrize(
+    "key,value",
+    [(5, 0), (-2, 20), ("A", 7), (slice(None), 2), (slice(None, None, 2), 0)],
+)
+@pytest.mark.parametrize(
+    "data,index,columns",
+    [
+        ([[1, 2, 3], [4, 5, 6], [7, 8, 9]], None, ["A", "B", "C"]),
+        ([1, 2, 3], None, ["A"]),
+        # To prevent dtype mismatch error, we cast the empty index (default int dtype) to object
+        (None, pd.Index([], dtype=object), ["A", "B"]),
+        (None, ["A", "B"], ["X"]),
+    ],
+)
+def test_df_loc_set_with_non_matching_1d_scalar_key(data, index, columns, key, value):
+    native_df = native_pd.DataFrame(data=data, index=index, columns=columns)
+    snow_df = pd.DataFrame(native_df)
+
+    def helper(df):
+        df.loc[key] = value
+
+    if (
+        not isinstance(key, slice)
+        and len(native_df.index) > 0
+        and not isinstance(key, type(native_df.index[0]))
+    ):
+        with SqlCounter(query_count=0):
+            # We should expect this case will fail because of snowflake type system mismatch.
+            with pytest.raises(
+                SnowparkSQLException, match="Numeric value 'A' is not recognized"
+            ):
+                helper(snow_df)
+                # Trigger action to run query
+                snow_df.to_pandas()
+    else:
+        expected_query_count = 1
+        expected_join_count = 1
+        if key == slice(None):
+            expected_join_count = 0
+        elif isinstance(key, slice) and key.step == 2:
+            expected_join_count += 1
+
+        with SqlCounter(
+            query_count=expected_query_count, join_count=expected_join_count
+        ):
+            # Treat index like any other column in a DataFrame when it comes to types,
+            # therefore Snowpark pandas returns a Index(dtype="object") for an empty index
+            # whereas pandas returns RangeIndex()
+            # This is compatible with behavior for empty dataframe in other tests.
+            eval_snowpark_pandas_result(
+                snow_df, native_df.copy(), helper, inplace=True, check_index_type=False
+            )
+
+
+@pytest.mark.parametrize(
+    "key,value",
+    [
+        (5, 0),
+        (-2, 20),
+    ],
+)
+@pytest.mark.parametrize("index", [None, [1, 2, 3]])
+@sql_count_checker(query_count=0)
+def test_df_loc_set_for_empty_dataframe_negative(index, key, value):
+    native_df = native_pd.DataFrame(index=index)
+    # assigning a new value via loc[x] does not work when dataframe has no columns.
+    snow_df = pd.DataFrame(native_df)
+
+    # Only Snowpark pandas raises ValueError.
+    err_msg = "cannot set a frame with no defined columns"
+    with pytest.raises(ValueError, match=err_msg):
+        snow_df.loc[key] = value
+        native_df.loc[key] = value
+        assert_frame_equal(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, value",
+    [
+        (5, "A", 0),
+        (-2, "B", 20),
+        ("A", "C", 7),
+        ("X", "D", 7),
+    ],
+)
+@pytest.mark.parametrize(
+    "native_df",
+    [
+        native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]),
+        # To prevent dtype mismatch error, we cast the empty index (default int dtype) to object
+        native_pd.DataFrame(index=pd.Index([], dtype=object)),
+    ],
+)
+def test_df_loc_set_row_col_with_non_matching_scalar_key(
+    native_df, row_key, col_key, value
+):
+    snow_df = pd.DataFrame(native_df.copy())
+
+    def helper(df):
+        df.loc[row_key, col_key] = value
+
+    if len(native_df.index) > 0 and not isinstance(row_key, type(native_df.index[0])):
+        # We should expect this case will fail because of snowflake type system mismatch.
+        with SqlCounter(query_count=0):
+            with pytest.raises(
+                SnowparkSQLException, match="Numeric value '.*' is not recognized"
+            ):
+                helper(snow_df)
+                # Trigger action to run query
+                snow_df.to_pandas()
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df.copy(),
+                helper,
+                inplace=True,
+            )
+
+
+@pytest.mark.parametrize(
+    "key,value",
+    [
+        ((slice(None), "D"), 7),
+    ],
+)
+@pytest.mark.parametrize(
+    "native_df",
+    [
+        native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_loc_set_row_col_with_col_enlargement(native_df, key, value):
+    snow_df = pd.DataFrame(native_df)
+
+    def helper(df):
+        df.loc[key] = value
+
+    eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+
+@sql_count_checker(query_count=0)
+def test_df_loc_set_with_multi_index_not_implemented():
+    df = pd.DataFrame(
+        [1, 2, 3, 4],
+        columns=["A"],
+        index=pd.MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1), (1, 1)]),
+    )
+
+    # multi-index not yet supported, track it here TODO SNOW-962260
+    with pytest.raises(
+        NotImplementedError, match="loc set for multiindex is not yet implemented"
+    ):
+        df.loc[(1, 1)] = 42
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        "c1",
+        "c2",
+        ["c2", "c1", "c2"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_col_str_key(key):
+    df = native_pd.DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]})
+    snow_df = pd.DataFrame(df)
+    eval_snowpark_pandas_result(snow_df, df, lambda df: df.loc[:, key])
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [True] * 7,
+        [False] * 7,
+        [random.choice([True, False]) for _ in range(7)],
+        # length mismatch
+        [random.choice([True, False]) for _ in range(random.randint(1, 7))],
+        [random.choice([True, False]) for _ in range(random.randint(8, 20))],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_key_bool(
+    key, key_type, default_index_snowpark_pandas_df, default_index_native_df
+):
+    def loc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # If native pandas DataFrame, truncate the df and key.
+            _df = df[: len(key)]
+            _key = key[: len(_df)]
+        else:
+            _key, _df = key, df
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key, dtype=bool)
+        elif key_type == "ndarray":
+            _key = np.array(_key, dtype=bool)
+        elif key_type == "series":
+            _key = (
+                pd.Series(_key)
+                if isinstance(_df, pd.DataFrame)
+                else native_pd.Series(_key)
+            )
+
+        return _df.loc[_key]
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            loc_helper,
+        )
+
+
+@sql_count_checker(query_count=2, join_count=0)
+def test_df_loc_get_key_bool_self_series():
+    native_df = native_pd.DataFrame(
+        {
+            "A": [2, 4, 5, 6, 1, 0],
+            "B": [True, False, True, False, False, True],
+            "C": ["true", "test", "apple", "bee", "jack", "mail"],
+        },
+        index=native_pd.Index(["a", "b", "c", 1, 3, "e"]),
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.loc[df["B"]]
+            if isinstance(df, pd.DataFrame)
+            else df.loc[df["B"]],
+        )
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.loc[(df["A"] > 4) | (df["A"] < 2)]
+            if isinstance(df, pd.DataFrame)
+            else df.loc[(df["A"] > 4) | (df["A"] < 2)],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True] * 5,
+        [False] * 5,
+        [random.choice([True, False]) for _ in range(5)],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
+    # aligned indices means both row_pos and index are exactly match
+    if use_default_index:
+        index = None
+    else:
+        # index can have null values and duplicates
+        index = pd.Index(["a", "a", None, "b", "b"], name="index")
+    native_df = native_pd.DataFrame(
+        {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[pd.Series(key, index=index, dtype="bool")]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[native_pd.Series(key, index=index, dtype="bool")],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True] * 5,
+        [False] * 5,
+        [random.choice([True, False]) for _ in range(5)],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
+    key, use_default_index
+):
+    # unaligned and distinct indices: e.g., [1,2,3,4,5] vs [5,4,3,2,1]
+    if use_default_index:
+        index = None
+        key_index = np.random.permutation(range(5))
+    else:
+        index_value = ["a", "b", "c", "d", "e"]
+        index = pd.Index(index_value, name="index")
+        key_index = generate_a_random_permuted_list_exclude_self(index_value)
+    native_df = native_pd.DataFrame(
+        {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[pd.Series(key, index=key_index, dtype="bool")]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[native_pd.Series(key, index=key_index, dtype="bool")],
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
+    key = [True] * 5
+    # index can have null values and duplicates
+    index_value = ["a", "a", None, "b", "b"]
+    index = pd.Index(index_value, name="index")
+    native_df = native_pd.DataFrame(
+        {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
+    )
+    permuted_index_value = generate_a_random_permuted_list_exclude_self(index_value)
+    key_index = pd.Index(permuted_index_value, dtype="string")
+    snow_df = pd.DataFrame(native_df)
+    series_key = pd.Series(key, index=key_index, dtype="bool", name="key")
+    native_series_key = native_pd.Series(key, index=key_index, dtype="bool", name="key")
+
+    # Note:
+    # pandas: always raise IndexingError when indices with duplicates are not aligned
+    # Snowpark pandas: perform outer join on index and no error will be raised
+    with pytest.raises(IndexingError):
+        native_df.loc[native_series_key]
+
+    assert_frame_equal(
+        snow_df.loc[series_key],
+        native_pd.DataFrame(
+            {
+                "c1": [1, 1, 2, 2, 3, 4, 4, 5, 5],
+                "c2": ["x", "x", "y", "y", "z", "d", "d", "e", "e"],
+            },
+            index=pd.Index(
+                ["a", "a", "a", "a", None, "b", "b", "b", "b"], name="index"
+            ),
+        ),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [
+            random.choice([True, False]) for _ in range(random.randint(0, 4))
+        ],  # shorter length
+        [
+            random.choice([True, False]) for _ in range(random.randint(6, 10))
+        ],  # larger length
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_key_bool_series_with_mismatch_index_len(key, use_default_index):
+    if use_default_index:
+        index = None
+        key_index = np.random.permutation(len(key))
+    else:
+        index = ["a", "b", "c", "d", "e", "a1", "b1", "c1", "d1", "e1"]
+        key_index = pd.Index(np.random.permutation(index[: len(key)]), dtype="string")
+        index = np.random.permutation(index[:5])
+    native_df = native_pd.DataFrame(
+        {"c1": [1, 2, 3, 4, 5], "c2": ["x", "y", "z", "d", "e"]}, index=index
+    )
+    snow_df = pd.DataFrame(native_df)
+    native_series_key = native_pd.Series(key, index=key_index, dtype="bool")
+
+    series_key = pd.Series(key, index=key_index, dtype="bool")
+    if len(key) < 5:
+        # Native pandas raises error if any index from native_df is not in the key; when no missing index exists, native
+        # pandas will perform as expected even though the key includes out-of-bound index
+        with pytest.raises(IndexingError):
+            native_df.loc[native_series_key]
+        # Snowpark pandas does not raise error but select the index existing in the key. So the behavior is same as
+        # the native one if the missing ones are dropped from native_df
+        native_df = native_df.drop(
+            index=[i for i in native_df.index if i not in native_series_key.index]
+        )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[series_key]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[native_series_key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [random.choice([True, False]) for _ in range(random.randint(0, 100))],
+        [random.choice([True, False]) for _ in range(random.randint(1000, 2000))],
+    ],
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow test")
+def test_df_loc_get_key_bool_series_with_1k_shape(key, native_df_1k_1k):
+    def loc_helper(df):
+        # Note:
+        # if key length does not match with df, Snowpark will only select the row position the key contains; while
+        # pandas will raise error, so we first truncate the df for pandas and then compare the result
+        return (
+            df.loc[pd.Series(key, dtype="bool")]
+            if isinstance(df, pd.DataFrame)
+            else df.iloc[: len(key)].loc[native_pd.Series(key, dtype="bool")]
+        )
+
+    query_count = 6
+    high_count_reason = None
+    if len(key) >= 300:
+        query_count = 11
+        high_count_reason = """
+            6 queries includes 5 queries to prepare the temp table for df, including create, insert, drop the temp table (3)
+            and alter session to set and unset query_tag (2) and one select query
+            11 queries add extra 5 queries to prepare the temp table for key
+        """
+
+    _test_df_loc_with_1k_shape(
+        native_df_1k_1k, loc_helper, query_count, high_count_reason
+    )
+
+
+def _test_df_loc_with_1k_shape(
+    native_df_1k_1k, loc_helper, query_count, high_count_reason=None
+):
+    df_1k_1k = pd.DataFrame(native_df_1k_1k)
+    high_count_expected = high_count_reason is not None
+
+    # test df with default index
+    with SqlCounter(
+        query_count=query_count,
+        join_count=1,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1k,
+            native_df_1k_1k,
+            loc_helper,
+        )
+
+    # test df with non-default index
+    native_df_1k_1k_non_default_index = native_df_1k_1k.reset_index().set_index("index")
+    df_1k_1k_non_default_index = pd.DataFrame(native_df_1k_1k_non_default_index)
+    with SqlCounter(
+        query_count=query_count,
+        join_count=1,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1k_non_default_index,
+            native_df_1k_1k_non_default_index,
+            loc_helper,
+        )
+
+    # test df 1 col with default index
+    native_df_1k_1 = native_df_1k_1k[["c0"]]
+    df_1k_1 = pd.DataFrame(native_df_1k_1)
+    with SqlCounter(
+        query_count=query_count,
+        join_count=1,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1,
+            native_df_1k_1,
+            loc_helper,
+        )
+
+    native_df_1k_1_non_default_index = (
+        native_df_1k_1k[["c0", "c1"]].reset_index().set_index("index")
+    )
+    df_1k_1_non_default_index = pd.DataFrame(native_df_1k_1_non_default_index)
+
+    # test df 1 col with non-default index
+    with SqlCounter(
+        query_count=query_count,
+        join_count=1,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    ):
+        eval_snowpark_pandas_result(
+            df_1k_1_non_default_index,
+            native_df_1k_1_non_default_index,
+            loc_helper,
+        )
+
+
+def test_df_loc_get_key_scalar(
+    default_index_snowpark_pandas_df, default_index_native_df
+):
+    key = random.choice(range(0, len(default_index_native_df)))
+    # squeeze and to_pandas triggers additional queries
+    with SqlCounter(query_count=2, join_count=3):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.loc[key],
+        )
+
+
+@pytest.mark.parametrize(
+    "native_series_key",
+    [
+        native_pd.Series([]),
+        native_pd.Series([0]),
+        native_pd.Series([3, 2, 1]),
+        native_pd.Series([3, 2, 1]),
+        native_pd.Series([3, 2, 1], index=[300, 244, 234]),  # index is ignored
+        native_pd.Series([2, 3, 1, 3, 2, 1]),
+        native_pd.Series(
+            [random.choice(range(0, 5)) for _ in range(random.randint(0, 20))]
+        ),
+    ],
+)
+@sql_count_checker(query_count=4, join_count=4)
+def test_df_loc_get_key_non_boolean(
+    native_series_key,
+    key_type,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    def loc_key_type_convert(key, is_snow_type, index_name=None):
+        if key_type == "series":
+            return pd.Series(key) if is_snow_type else native_pd.Series(key)
+        elif key_type == "list":
+            return key.to_list()
+        elif key_type == "array":
+            return key.to_numpy()
+        elif key_type == "index":
+            # native pandas has a bug to overwrite loc result's index name to the key's index name
+            # so for testing, we overwrite the index name to be the same as the index name in the main frame
+            return pd.Index(key.to_list(), name=index_name)
+
+    # default index
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df.loc[
+                loc_key_type_convert(native_series_key, isinstance(df, pd.DataFrame))
+            ],
+        )
+
+    # non default index
+    non_default_index_native_df = default_index_native_df.reset_index().set_index(
+        "index"
+    )
+    non_default_index_snowpark_pandas_df = pd.DataFrame(non_default_index_native_df)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            non_default_index_snowpark_pandas_df,
+            non_default_index_native_df,
+            lambda df: df.loc[
+                loc_key_type_convert(
+                    native_series_key, isinstance(df, pd.DataFrame), index_name="index"
+                )
+            ],
+        )
+
+    # non default index with duplicates and null
+    dup_native_df = native_pd.concat(
+        [
+            non_default_index_native_df,
+            non_default_index_native_df,
+            non_default_index_native_df.set_index([[None] * 7]),
+        ]
+    )
+    dup_snowpandas_df = pd.DataFrame(dup_native_df)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            dup_snowpandas_df,
+            dup_native_df,
+            lambda df: df.loc[
+                loc_key_type_convert(native_series_key, isinstance(df, pd.DataFrame))
+            ],
+        )
+
+    # use key with null values
+    native_series_key = native_pd.concat([native_series_key.astype("Int64"), None])
+    non_default_index_native_df = (
+        default_index_native_df.reset_index()
+        .astype({"index": "Int64"})
+        .set_index("index")
+    )
+
+    dup_native_df = native_pd.concat(
+        [
+            non_default_index_native_df,
+            non_default_index_native_df,
+            non_default_index_native_df.set_index([[None] * 7]),
+        ]
+    )
+    dup_snowpandas_df = pd.DataFrame(dup_native_df)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            dup_snowpandas_df,
+            dup_native_df,
+            lambda df: df.loc[
+                loc_key_type_convert(native_series_key, isinstance(df, pd.DataFrame))
+            ],
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        # key with short size (i.e., generated by inline sql)
+        [random.randint(-1500, 1500) for _ in range(random.randint(0, 100))],
+        # key with large size (i.e., generated by temp table)
+        [random.randint(-1500, 1500) for _ in range(random.randint(1000, 1500))],
+    ],
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow test")
+def test_df_loc_get_key_non_boolean_series_with_1k_shape(key, native_df_1k_1k):
+    def loc_helper(df):
+        # similarly, remove out-of-bound values, so we can avoid error and compare
+        return (
+            df.loc[pd.Series(key)]
+            if isinstance(df, pd.DataFrame)
+            else df.loc[[k for k in key if k in range(1000)]]
+        )
+
+    query_count = 6
+    high_count_reason = None
+    if len(key) >= 300:
+        query_count = 11
+        high_count_reason = """
+            6 queries includes 5 queries to prepare the temp table for df, including create, insert, drop the temp table (3)
+            and alter session to set and unset query_tag (2) and one select query
+            11 queries add extra 5 queries to prepare the temp table for key
+        """
+
+    _test_df_loc_with_1k_shape(
+        native_df_1k_1k, loc_helper, query_count, high_count_reason
+    )
+
+
+@pytest.mark.parametrize(
+    "start",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+@pytest.mark.parametrize("monotonic_decreasing", [False, True])
+@sql_count_checker(query_count=1)
+def test_df_loc_get_key_slice(
+    start,
+    stop,
+    step,
+    monotonic_decreasing,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    if monotonic_decreasing:
+        native_df = default_index_native_df[::-1]
+        snow_df = pd.DataFrame(native_df)
+    else:
+        native_df = default_index_native_df
+        snow_df = default_index_snowpark_pandas_df
+
+    # test both slice and range
+    if start is not None and stop is not None and step is not None:
+        key = range(start, stop, step)
+    else:
+        key = slice(start, stop, step)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        # pandas does not allow out-of-bounds in range like key, so we always use slice for native pandas for testing
+        lambda df: df.loc[key]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[slice(start, stop, step)],
+    )
+
+
+@pytest.mark.parametrize(
+    "start",
+    [
+        None,
+        1,
+        4,
+        6,
+    ],  # no out-of-bound keys because pandas will raise KeyError when the index is unordered
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, 1, 4, 6],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_key_slice_with_unordered_index(
+    start,
+    stop,
+    step,
+    default_index_native_df,
+):
+    unordered_index = [0, None, 6, 1, 4, 5, None]
+    native_df = default_index_native_df
+    native_df.index = unordered_index
+    snow_df = pd.DataFrame(native_df)
+
+    # test both slice and range
+    if start is not None and stop is not None and step is not None:
+        key = range(start, stop, step)
+    else:
+        key = slice(start, stop, step)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[key]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[slice(start, stop, step)],
+    )
+
+
+@pytest.mark.parametrize(
+    "start",
+    [
+        None,
+        1,
+        4,
+        6,
+    ],  # no out-of-bound keys because pandas will raise KeyError when the index is unordered
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, 1, 4, 6],
+)
+@pytest.mark.parametrize(
+    "step",
+    [-1, -2, -9],
+)
+@pytest.mark.parametrize(
+    "na_position",
+    ["last", "first"],
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_reversed_key_slice_with_unordered_nullable_index(
+    start,
+    stop,
+    step,
+    default_index_native_df,
+    na_position,
+):
+    unordered_index = [0, None, 6, 1, 4, 5, None]
+    native_df = default_index_native_df.assign(has_null=[0, 2, None, None, 3, None, -1])
+    native_df.index = unordered_index
+
+    snow_df = pd.DataFrame(native_df)
+    native_df = native_df.sort_values(by="has_null", na_position=na_position)
+    snow_df = snow_df.sort_values(by="has_null", na_position=na_position)
+
+    # test both slice and range
+    if start is not None and stop is not None and step is not None:
+        key = range(start, stop, step)
+    else:
+        key = slice(start, stop, step)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[key]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[slice(start, stop, step)],
+    )
+
+
+@pytest.mark.parametrize("key", [slice("b", "d"), slice("a", "f")])
+@sql_count_checker(query_count=1)
+def test_df_loc_get_with_duplicate_index_get_key_slice(key):
+    native_df = native_pd.DataFrame(
+        {"c": [0, 1, 2, 3, 4]}, index=["b", "b", "d", "d", "e"]
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.loc[key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key, expected_index",
+    [
+        # When start and stop can be found, the left bound will be the one with minimal row position and the right bound
+        # will be the one with maximum row position
+        [slice("b", "d"), ["b", "d", None, "d"]],
+        # When start or stop cannot be found, find any values in between
+        [slice("a", "f"), ["b", "d", None, "d", "b", "e"]],
+        [slice("f", "a"), []],
+        [slice("d", "b"), ["d", None, "d", "b"]],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_loc_get_with_non_monotonic_index_get_key_slice(key, expected_index):
+    native_df = native_pd.DataFrame(
+        {"c": [0, 1, 2, 3, 4, 5]}, index=["b", "d", None, "d", "b", "e"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # native df cannot get the bound for non-unique labels
+    with pytest.raises(KeyError):
+        native_df.loc[key]
+
+    assert list(snow_df.loc[key].index) == expected_index
+
+
+@pytest.mark.parametrize(
+    "key, expected_error_type, expected_exception_match",
+    [
+        [slice(None, None, 0), ValueError, "slice step cannot be zero"],
+        [slice(None, None, 1.1), TypeError, "slice step must be integer"],
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_loc_get_key_slice_negative(
+    key,
+    expected_error_type,
+    expected_exception_match,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df.loc[key],
+        expect_exception=True,
+        expect_exception_type=expected_error_type,
+        expect_exception_match=expected_exception_match,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "df",
+    [
+        native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]),
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            columns=["A", "B", "C"],
+            index=["x", "y", "z"],
+        ),
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            columns=["A", "B", "C"],
+            index=["d", "d", "d"],
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_self_df_set_aligned_row_key(df):
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["C", "A", "B"],
+        index=df.index,
+    )
+
+    def loc_set_helper(df):
+        df.loc[df["A"] > 1] = (
+            item if isinstance(df, native_pd.DataFrame) else pd.DataFrame(item)
+        )
+
+    if df.index.has_duplicates:
+        # pandas raises error for duplicates while Snowpark pandas can perform correctly
+        with pytest.raises(
+            ValueError, match="cannot reindex on an axis with duplicate labels"
+        ):
+            loc_set_helper(df)
+        snow = pd.DataFrame(df)
+        loc_set_helper(snow)
+        assert_frame_equal(
+            snow,
+            native_pd.DataFrame(
+                [[1, 2, 3], [40, 50, 60], [70, 80, 90]],
+                columns=["A", "B", "C"],
+                index=["d", "d", "d"],
+            ),
+            check_dtype=False,
+        )
+    else:
+        eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item_values",
+    [
+        # Test single row (existing) and combinations of existing/new columns
+        ("a", None, 99),
+        ("a", "C", 99),
+        ("c", "T", 97),
+        ("d", ["V", "T"], 96),
+        ("a", ["B", "T"], 95),
+        pytest.param(
+            "a",
+            ["B", "B", "T", "T"],
+            95,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1057861: Investigate locset behavior with missing index value",
+            ),
+        ),
+        # Test single row (new / not existing) and combinations of existing/new columns
+        ("b", ["A", "D"], 98),
+        ("x", None, 94),
+        ("x", "D", 94),
+        ("y", ["B", "C"], 93),
+        ("z", "T", 92),
+        pytest.param(
+            "w",
+            ["V", "T"],
+            91,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1321196: pandas 2.2.1 migration test failure",
+            ),
+        ),
+        pytest.param(
+            "u",
+            ["C", "T"],
+            90,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1321196: pandas 2.2.1 migration test failure",
+            ),
+        ),
+        pytest.param(
+            "v",
+            ["B", "B", "T", "T"],
+            95,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1321196: pandas 2.2.1 migration test failure",
+            ),
+        ),
+        # Test list like item
+        ("a", None, [99]),
+        ("a", None, [99, 98, 97, 96]),
+        ("y", ["B", "C"], [0]),
+        ("y", ["B", "C"], [0, 1]),
+        pytest.param(
+            "u",
+            ["X", "T"],
+            [90, 91],
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1321196: pandas 2.2.1 migration test failure",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "data_index",
+    [
+        # Test with unique index values
+        ["a", "b", "c", "d"],
+        # Test with duplicate index values
+        ["a", "a", "c", "d"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_scalar_row_key_enlargement(
+    row_key, col_key, item_values, data_index
+):
+    """
+    Some tests above are marked as xfail since the new pandas behavior from versions 2.2.0+ seems like a bug in
+    pandas; issue here: https://github.com/pandas-dev/pandas/issues/58316
+
+    The problem arises when loc set with a scalar item is performed with new rows and columns. The "new" column values
+    contain byte values b'' instead of NaN.
+    """
+    data = {
+        "A": [5, 8, 11, 14],
+        "B": [6, 9, 12, 15],
+        "C": [7, 10, 13, 16],
+        "D": [8, 11, 14, 17],
+    }
+
+    snow_df = pd.DataFrame(data, index=data_index)
+    native_df = native_pd.DataFrame(data, index=data_index)
+
+    def set_loc_helper(df):
+        if col_key is None:
+            df.loc[row_key] = item_values
+        else:
+            df.loc[row_key, col_key] = item_values
+
+    eval_snowpark_pandas_result(snow_df, native_df, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, col_key, item_values",
+    [
+        # Test single row (existing) and combinations of existing/new columns
+        (
+            "a",
+            ["B", "B", "T", "T"],
+            95,
+        ),
+        # Test single row (new / not existing) and combinations of existing/new columns
+        (
+            "w",
+            ["V", "T"],
+            91,
+        ),
+        (
+            "u",
+            ["C", "T"],
+            90,
+        ),
+        (
+            "v",
+            ["B", "B", "T", "T"],
+            95,
+        ),
+        # Test list like item
+        (
+            "u",
+            ["X", "T"],
+            [90, 91],
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "data_index",
+    [
+        # Test with unique index values
+        ["a", "b", "c", "d"],
+        # Test with duplicate index values
+        ["a", "a", "c", "d"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_scalar_row_key_enlargement_deviates_from_native_pandas(
+    row_key, col_key, item_values, data_index
+):
+    """
+    This test is to check whether the xfail'd tests above work as expected in Snowpark pandas.
+    See pandas issue: https://github.com/pandas-dev/pandas/issues/58316
+    """
+    data = {
+        "A": [5, 8, 11, 14],
+        "B": [6, 9, 12, 15],
+        "C": [7, 10, 13, 16],
+        "D": [8, 11, 14, 17],
+    }
+
+    snow_df = pd.DataFrame(data, index=data_index)
+    native_df = native_pd.DataFrame(data, index=data_index)
+
+    def set_loc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # Explicitly set the values in the new column to NaN to prevent byte data output.
+            new_col_key = [col for col in ["V", "X", "T"] if col in col_key]
+            df.loc[:, new_col_key] = np.nan
+        df.loc[row_key, col_key] = item_values
+
+    eval_snowpark_pandas_result(snow_df, native_df, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "col_key, item_value, expect_pandas_fail, expect_snowpark_fail",
+    [
+        (None, native_pd.DataFrame([99]), True, True),
+        ("a", native_pd.DataFrame([99]), True, True),
+        # Note that pandas succeeds here only when col_key is a new column but seems to always set to Nan values, so
+        # we'll just fail this case also.
+        ("w", native_pd.DataFrame([99]), False, True),
+        ("a", native_pd.Series([1]), True, True),
+        # Note that pandas fails when col_key is a new column and item is a list or tuple, Snowpark pandas works for all
+        # these cases
+        ("a", [1], True, False),
+        ("a", (1,), True, False),
+        ("w", [1], False, False),
+        ("w", (1,), False, False),
+        ("a", np.array([1]), False, False),
+        ("a", pd.Index([1]), False, False),
+    ],
+)
+def test_df_loc_set_scalar_with_item_negative(
+    col_key, item_value, expect_pandas_fail, expect_snowpark_fail
+):
+    native_df = native_pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    row_key = "x"
+
+    def perform_loc_set(df):
+        item_ = item_value
+        if isinstance(df, pd.DataFrame):
+            if isinstance(item_value, native_pd.DataFrame):
+                item_ = pd.DataFrame(item_value)
+            elif isinstance(item_value, native_pd.Series):
+                item_ = pd.Series(item_value)
+        if col_key is None:
+            df.loc[row_key] = item_
+        else:
+            df.loc[row_key, col_key] = item_
+
+    if not expect_pandas_fail and not expect_snowpark_fail:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                perform_loc_set,
+                inplace=True,
+            )
+    else:
+        with SqlCounter(query_count=0):
+            if expect_pandas_fail and expect_snowpark_fail:
+                eval_snowpark_pandas_result(
+                    snow_df,
+                    native_df,
+                    perform_loc_set,
+                    expect_exception=True,
+                    assert_exception_equal=False,
+                    inplace=True,
+                )
+
+
+def test_empty_df_loc_set_scalar():
+    # Check `loc` with row scalar on empty DataFrame.
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame(native_df)
+    with pytest.raises(ValueError, match="cannot set a frame with no defined columns"):
+        native_df.loc[0] = 1
+
+    with SqlCounter(query_count=1):
+        snow_df.loc[0] = 1
+        assert_snowpark_pandas_equal_to_pandas(
+            snow_df,
+            native_pd.DataFrame(index=[0]),
+            check_column_type=False,
+        )
+
+    # Check `loc` with column scalar on empty DataFrame.
+    native_df = native_pd.DataFrame()
+    snow_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=1):
+        with pytest.raises(
+            ValueError, match="cannot set a frame with no defined index and a scalar"
+        ):
+            native_df.loc[:, 0] = 1
+        snow_df.loc[:, 0] = 1
+        assert snow_df.empty
+
+    def row_loc(df):
+        df.loc[0] = 1
+
+    def col_loc(df):
+        df.loc[:, "A"] = 1
+
+    native_df = native_pd.DataFrame(index=[0, 1, 2])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with row scalar on empty DataFrame with non-empty index.
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, row_loc, inplace=True, check_column_type=False
+        )
+
+    native_df = native_pd.DataFrame(index=[0, 1, 2])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with column scalar on empty DataFrame with non-empty index.
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, col_loc, inplace=True, check_column_type=False
+        )
+
+    native_df = native_pd.DataFrame(columns=["A", "B", "C"])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with row scalar on empty DataFrame with non-empty columns.
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            row_loc,
+            inplace=True,
+        )
+
+    native_df = native_pd.DataFrame(columns=["A", "B", "C"])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with column scalar on empty DataFrame with non-empty columns.
+    with SqlCounter(query_count=1):
+        col_loc(snow_df)
+        assert_snowpark_pandas_equal_to_pandas(
+            snow_df,
+            native_pd.DataFrame(columns=["A", "B", "C"]),
+            check_dtype=False,
+            check_index_type=False,
+        )
+
+    native_df = native_pd.DataFrame(index=[0, 1, 2], columns=["A", "B", "C"])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with row scalar on empty DataFrame with non-empty index and columns.
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            row_loc,
+            inplace=True,
+        )
+
+    native_df = native_pd.DataFrame(index=[0, 1, 2], columns=["A", "B", "C"])
+    snow_df = pd.DataFrame(native_df)
+    # Check `loc` with column scalar on empty DataFrame with non-empty index and columns.
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            col_loc,
+            inplace=True,
+        )
+
+    # Test enlargening of empty DataFrame
+    snow_df = pd.DataFrame()
+    with SqlCounter(query_count=1):
+        snow_df.loc[0] = 0
+        snow_df.loc[:, 0] = 0
+        assert_snowpark_pandas_equal_to_pandas(
+            snow_df,
+            native_pd.DataFrame([[0]]),
+            check_dtype=False,
+        )
+
+
+@pytest.mark.parametrize(
+    "native_item",
+    [
+        [1, 2, 3],
+        np.array([1, 2, 3]),
+        native_pd.Series([1, 2, 3]),
+        native_pd.Series([1, 2, 3], index=["a", "b", "c"]),
+        native_pd.Series(["abc", 4, 9.0]),
+        native_pd.Series([8, None, None, 1], native_pd.Index(["a", None, None, "d"])),
+    ],
+)
+def test_empty_df_loc_set_series_and_list(native_item):
+    # To prevent dtype mismatch error in Snowpark pandas, we cast the empty index (default int dtype) to object
+    snow_df = pd.DataFrame(index=pd.Index([], dtype=object))
+    native_df = native_pd.DataFrame()
+    snow_item = (
+        pd.Series(native_item)
+        if isinstance(native_item, native_pd.Series)
+        else native_item
+    )
+
+    expected_join_count = 1 if isinstance(native_item, native_pd.Series) else 2
+
+    def setitem_op(df):
+        item = native_item if isinstance(df, native_pd.DataFrame) else snow_item
+        df.loc[:, "A"] = item
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        if isinstance(native_item, native_pd.Series):
+            eval_snowpark_pandas_result(snow_df, native_df, setitem_op, inplace=True)
+        else:
+            # When item is a list Snowpark pandas behavior is different from native pandas.
+            # In Snowpark pandas output df will have null index values. To match native
+            # pandas reset index values.
+            setitem_op(snow_df)
+            expected_df = native_pd.Series(
+                native_item, name="A", index=[None] * len(native_item)
+            ).to_frame()
+            assert_frame_equal(
+                snow_df, expected_df, check_index_type=False, check_dtype=False
+            )
+
+
+@pytest.mark.parametrize(
+    "start",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+@pytest.mark.parametrize("monotonic_decreasing", [False, True])
+def test_df_loc_set_key_slice(
+    start,
+    stop,
+    step,
+    monotonic_decreasing,
+):
+    data = {
+        "A": [1, 2, 3, 4, 6, 7],
+        "B": [5, 6, 7, 8, 9, 10],
+        "C": [9, 10, 11, 12, 13, 14],
+        "D": [13, 14, 15, 16, 17, 18],
+    }
+
+    native_df = native_pd.DataFrame(data)
+    if monotonic_decreasing:
+        native_df = native_df[::-1]
+
+    native_item_df = -native_df
+
+    snow_df = pd.DataFrame(native_df)
+    snow_item_df = pd.DataFrame(native_item_df)
+
+    key = slice(start, stop, step)
+
+    def set_loc_helper(df):
+        if isinstance(df, pd.DataFrame):
+            df.loc[key] = snow_item_df
+        else:
+            df.loc[key] = native_item_df
+
+    expected_join_count = 1 if key == slice(None, None, None) else 4
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "val_index",
+    [
+        ["v"],
+        ["x"],
+    ],
+)
+@pytest.mark.parametrize(
+    "val_columns",
+    [
+        ["A"],
+        ["Z"],
+    ],
+)
+@pytest.mark.parametrize(
+    "key",
+    [
+        ["A"],  # matching_item_columns_by_label = True
+        pytest.param(
+            "A",  # matching_item_columns_by_label = False
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1057861: Investigate locset behavior with missing index value",
+            ),
+        ),
+    ],
+)
+def test_df_loc_set_item_df_single_value(key, val_index, val_columns):
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    val = native_pd.DataFrame([100], columns=val_columns, index=val_index)
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            df.loc[:, key] = pd.DataFrame(val)
+        else:
+            df.loc[:, key] = val
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(native_df), native_df, setitem, inplace=True
+        )
+
+
+@pytest.mark.parametrize(
+    "col_key",
+    [
+        "A",
+        ["A", "B"],
+        ["B", "A", "C"],
+        "X",
+        pytest.param(
+            ["A", "X", "A"],
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason="SNOW-1321196: pandas 2.2.1 migration",
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_df_loc_set_with_scalar_item(col_key):
+    item = 100
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_columns = ["A", "B", "C"]
+    native_index = ["x", "y", "z"]
+    row_key = ["x", "z"]
+
+    native_df = native_pd.DataFrame(data, columns=native_columns, index=native_index)
+    snow_df = pd.DataFrame(native_df)
+
+    def loc_set_helper(df):
+        df.loc[row_key, col_key] = item
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        loc_set_helper,
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "col_key",
+    [
+        "A",
+        "X",
+        ["B"],
+        ["Y"],
+        pd.Index(["A"]),
+        None,  # Should enlarge dataframe and create new column named `None`.
+    ],
+)
+@pytest.mark.parametrize(
+    "item",
+    [
+        [999],
+        [99, 98],
+        [99, 98, 97, 96],
+    ],
+)
+@pytest.mark.parametrize("item_type_name,item_to_type", ITEM_TYPE_LIST_CONVERSION)
+def test_df_loc_set_with_column_wise_list_like_item(
+    col_key, item, item_type_name, item_to_type
+):
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_columns = ["A", "B", "C"]
+    native_index = ["x", "y", "z"]
+    row_key = ["x", "z"]
+
+    native_df = native_pd.DataFrame(data, columns=native_columns, index=native_index)
+    snow_df = pd.DataFrame(native_df)
+    native_item = item
+
+    def loc_set_helper(df):
+        if isinstance(df, pd.DataFrame):
+            df.loc[row_key, col_key] = item_to_type(item)
+        else:
+            # Native pandas does not allow enlargement with col_key ["B"] but allows enlargement with "B". Need to
+            # convert ["B"] to "B" for comparison.
+            df.loc[
+                row_key, (col_key[0] if convert_list_to_string else col_key)
+            ] = item_to_type(native_item)
+
+    convert_list_to_string = False
+    if (
+        not is_scalar(col_key)
+        # when len(item) == len(row_key AND/OR col_key), native pandas has enough item values to assign
+        and len(item) != len(col_key)
+        and len(item) != len(row_key)
+    ):
+        # when col_key and item are lists of unequal length, pandas raises error if
+        # 1. the length of item and col_key do not match when col_key length > 1
+        # 2. the length of item and row_key do not match when col_key length = 1
+        # Snowpark pandas works with such input - given a list of items, they are assigned to row-col combinations
+        # based on the row_key and col_key order. For a given df,
+        # >>> snow_df
+        #    A  B  C
+        # x  1  2  3
+        # y  4  5  6
+        # z  7  8  9
+        # >>> snow_df.loc[["x", "z"], ["B"]] = [99, 98, 97, 96]
+        # >>> snow_df
+        #    A   B  C
+        # x  1  99  3
+        # y  4   5  6
+        # z  7  98  9
+        # The column keys ["B"] and "B" produce same behavior in Snowpark pandas.
+        # Convert the native pandas row/col key to one that behaves like Snowpark pandas does (ignore extra values).
+        native_item = item
+        if len(col_key) == 1:
+            # Truncate the item to be as long as the row key or insert the last value in item to make it as long at the
+            # row key - Snowpark pandas fills in empty spots in item (if len(item) < len(row_key)) with the last
+            # element in item.
+            native_item = (
+                item[: len(row_key)]
+                if len(row_key) <= len(item)
+                else item + [item[-1]] * (len(row_key) - len(item))
+            )
+        elif len(col_key) > 1:
+            # Truncate or elongate item based on col key.
+            native_item = (
+                item[: len(col_key)]
+                if len(col_key) <= len(item)
+                else item + [item[-1]] * (len(col_key) - len(item))
+            )
+        if col_key[0] in native_columns and len(item) > len(native_columns):
+            # Native pandas treats col_key "B" and ["B"] differently, need to convert ["B"] to "B" for comparison.
+            # Here, B is present in the df. ["Y"] works. This applies only for columns len(item) > len(df col).
+            convert_list_to_string = True
+    elif (
+        # When a col_key list len(col_key) == 0 which is a label present in df.columns, it can only be assigned an item
+        # of length 1.
+        # TODO: SNOW-1008469 write this test to support list-like col_key longer than 1 and document behavior.
+        not is_scalar(col_key)
+        and col_key[0] in native_columns
+        and len(item) != 1
+    ):
+        # pandas raises error for this case when "A" is an existing column, e.g., df[["x","z"], ["A"]] = [99,98]. If we
+        # change "A" to "X" a new label, then it will work. Snowpark pandas will keep handling it as row wise case.
+        with pytest.raises(ValueError):
+            loc_set_helper(native_df)
+        # change ["A"] to "A" will make both work
+        col_key = col_key[0]
+    elif (
+        is_scalar(col_key)
+        and not (col_key in native_columns and len(item) == 1)
+        and not (
+            item_type_name in ["array", "index"] and len(item) == 1
+        )  # e.g., col_key = 'X', item = [999], item_type_name = 'array' works
+        and len(item) != len(row_key)
+    ):
+        # CASE: scalar col_key raises ValueError in native pandas if there is a mismatch in row and item length.
+        # Native pandas only supports using a new scalar label/col_key (one that is not in the df column list,
+        # e.g., "X", 2, "abc") if the number of items being assigned is equal to the length of the row key. If an item
+        # whose length is not equal to the row key length, a ValueError is raised.
+        #
+        # Snowpark pandas does not check length of item and will pass regardless of whether the row_key and item are
+        # of the same length. Snowpark pandas uses the item data available and fills extra spots with NaN
+        # if len(row_key) > len(item). If len(row_key) < len(item), the extra item values are skipped.
+        # >>> snow_df
+        #    A  B  C
+        # x  1  2  3
+        # y  4  5  6
+        # z  7  8  9
+        # >>> snow_df.loc[["x", "z"], "B"] = [99, 98, 97, 96]
+        # >>> snow_df
+        #    A   B  C
+        # x  1  99  3
+        # y  4   5  6
+        # z  7  98  9
+        # The column keys ["B"] and "B" produce same behavior in Snowpark pandas.
+        # Verify that native pandas raises ValueError. There are two types of error messages raised based on item type.
+        if item_type_name in ["list", "tuple"] and not (
+            col_key in native_columns and len(item) > len(row_key)
+        ):
+            err_msg = "Must have equal len keys and value when setting with an iterable"
+        else:  # array, index
+            # E.g., for col_key = 'X', item = [99, 98, 97, 96], full error message:
+            # Error could be one of two messages depending on the input:
+            #   'shape mismatch: value array of shape (4,) could not be broadcast to indexing result of shape (2,)'
+            # or
+            #   'setting an array element with a sequence.'
+            # We just look for "array" to make things simple since they are both ValueErrors
+            err_msg = " array "
+        with pytest.raises(ValueError, match=err_msg):
+            loc_set_helper(native_df)
+
+        # Truncate the item to be as long as the row key or fill in empty spots with the last element to make item as
+        # long as the row key.
+        native_item = item[: len(row_key)]
+        if len(native_item) < len(row_key):
+            native_item = native_item + [native_item[-1]] * (
+                len(row_key) - len(native_item)
+            )
+
+    expected_join_count = 4 if len(item) > 1 else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_set_helper,
+            inplace=True,
+        )
+
+
+@pytest.mark.parametrize(
+    "col_key",
+    [
+        ["A", "B"],
+        ["B", "A", "C"],
+        ["A", "X"],
+        ["X", "A"],
+        ["X", "Y"],
+        ["B", "X", "A", "C"],
+    ],
+)
+@pytest.mark.parametrize(
+    "item",
+    [
+        [999],
+        [99, 98],
+        [99, 98, 97, 96],
+    ],
+)
+@pytest.mark.parametrize("item_type_name,item_to_type", ITEM_TYPE_LIST_CONVERSION)
+def test_df_loc_set_with_row_wise_list_like_item(
+    col_key, item, item_type_name, item_to_type
+):
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_columns = ["A", "B", "C"]
+    native_index = ["x", "y", "z"]
+    row_key = ["x", "z"]
+
+    native_df = native_pd.DataFrame(data, columns=native_columns, index=native_index)
+    snow_df = pd.DataFrame(native_df)
+    native_item = item
+
+    def loc_set_helper(df):
+        df.loc[row_key, col_key] = item_to_type(
+            item if isinstance(df, pd.DataFrame) else native_item
+        )
+
+    # Native pandas has different error messages depending on whether a column not present in the df is used in the
+    # column key.
+    if "X" in col_key and len(item) != len(col_key):
+        err_msg = "Must have equal len keys and value when setting with an iterable"
+        if len(item) > 1:
+            # When col_key is list and item's length > 1 or new label exists, both native pandas and Snowpark pandas
+            # raises error if the length of item and col_key do not match when col_key length > 1
+            with SqlCounter(query_count=0, join_count=0):
+                eval_snowpark_pandas_result(
+                    snow_df,
+                    native_df,
+                    loc_set_helper,
+                    inplace=True,
+                    expect_exception=True,
+                    expect_exception_match=err_msg,
+                )
+        else:
+            # Only native pandas raises an error if a column not present in df is used when item and column key lengths
+            # don't match.
+            with pytest.raises(ValueError, match=err_msg):
+                native_df.loc[row_key, col_key] = item_to_type(item)
+            # Change item so that native pandas result matches expected Snowpark pandas result.
+            native_item = (
+                item[: len(col_key)]
+                if len(col_key) <= len(item)
+                else item + ([item[-1]] * (len(col_key) - len(item)))
+            )
+            with SqlCounter(query_count=1, join_count=2):
+                eval_snowpark_pandas_result(
+                    snow_df, native_df, loc_set_helper, inplace=True
+                )
+
+    elif len(item) > 1 and len(item) != len(col_key):
+        # When col_key is list and item's length > 1 or new label exists, both native pandas and Snowpark pandas raises
+        # error if the length of item and col_key do not match when col_key length > 1
+        # Could be one of two error messages:
+        # ValueError: setting an array element with a sequence.
+        # ValueError: shape mismatch: value array of shape (4,) could not be broadcast to indexing result of shape (2,3)
+        native_err_msg = re.escape("array")
+        with pytest.raises(ValueError, match=native_err_msg):
+            native_df.loc[row_key, col_key] = item_to_type(item)
+        with SqlCounter(query_count=0, join_count=0):
+            snowpark_err_msg = (
+                "Must have equal len keys and value when setting with an iterable"
+            )
+            with pytest.raises(ValueError, match=snowpark_err_msg):
+                snow_df.loc[row_key, col_key] = item_to_type(item)
+                assert_frame_equal(snow_df, native_df)  # to trigger computation
+
+    else:
+        # Both Snowpark pandas and Native pandas should have same non-error behavior.
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                snow_df, native_df, loc_set_helper, inplace=True
+            )
+
+
+def test_df_loc_set_columns_with_boolean_series_optimized():
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def helper(df):
+        df.loc[df["a"] != 1, "b"] = 10
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+    def helper(df):
+        df.loc[df["a"] != 1, "b"] = df["c"]
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_boolean_series_with_non_default_index_key_and_scalar_item():
+    native_df = native_pd.DataFrame([1, 2, 3], columns=["A"])
+    snow_df = pd.DataFrame(native_df)
+
+    # Series key with non-default index
+    row_key = native_pd.Series([False, True, True, False], index=[0, 1, 6, 2])
+
+    native_df.loc[row_key, "A"] = 99
+    snow_df.loc[pd.Series(row_key), "A"] = 99
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "index, columns, item",
+    [
+        [[1, 1], "x", ["abc", "xyz"]],  # existing column
+        [[1, 1], "y", ["abc", "xyz"]],  # new column
+        [
+            [1, 1],
+            "x",
+            native_pd.Series(["abc", "xyz"], index=[1, 1]),
+        ],  # series, existing column
+        [
+            [1, 1],
+            "y",
+            native_pd.Series(["abc", "xyz"], index=[1, 1]),
+        ],  # series, new column
+        [
+            [1, 1],
+            ["x"],
+            native_pd.DataFrame({"x": ["abc", "xyz"]}, index=[1, 1]),
+        ],  # df, existing column
+        [
+            [1, 1],
+            ["y"],
+            native_pd.DataFrame({"x": ["abc", "xyz"]}, index=[1, 1]),
+        ],  # df, new column
+    ],
+)
+@pytest.mark.parametrize(
+    "self_index_type, self_index_val",
+    [
+        ["unique", list(range(4))],
+        ["duplicate", [1, 1, 2, 3]],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=4)
+def test_df_loc_set_duplicate_index(
+    self_index_type, self_index_val, index, columns, item
+):
+    data = {"x": ["a", "b", "c", "d"]}
+    snow_df = pd.DataFrame(data, index=self_index_val)
+    native_df = native_pd.DataFrame(data, index=self_index_val)
+
+    def helper(df):
+        if isinstance(df, pd.DataFrame):
+            if isinstance(item, native_pd.DataFrame):
+                item_ = pd.DataFrame(item)
+            elif isinstance(item, native_pd.Series):
+                item_ = pd.Series(item)
+            else:
+                item_ = item
+            df.loc[index, columns] = item_
+        else:
+            df.loc[index, columns] = item
+            if isinstance(item, list) and (index, columns, item) == (
+                [1, 1],
+                "y",
+                ["abc", "xyz"],
+            ):
+                # Due to a pandas bug introduced in 2.1.2, missing string values are now the
+                # string value "nan" rather than numeric np.nan.
+                # https://github.com/pandas-dev/pandas/issues/56379
+                assert "nan" in df["y"].unique()
+                df.replace({"nan": None}, inplace=True)
+
+    if self_index_type == "unique":
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+    else:
+        # pandas only works where item is dataframe and the column is new; otherwise, it raises errors.
+        if isinstance(item, native_pd.DataFrame) and columns == ["y"]:
+            eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+        else:
+            with pytest.raises(ValueError):
+                helper(native_df)
+            # Snowpark pandas always work as expected
+            helper(snow_df)
+            native_df.loc[1, columns] = "xyz"
+            if isinstance(item, list) and (index, columns, item) == (
+                [1, 1],
+                "y",
+                ["abc", "xyz"],
+            ):
+                # Due to a pandas bug introduced in 2.1.2, missing string values are now the
+                # string value "nan" rather than numeric np.nan.
+                # https://github.com/pandas-dev/pandas/issues/56379
+                assert "nan" in native_df["y"].unique()
+                native_df.replace({"nan": None}, inplace=True)
+            assert_snowpark_pandas_equal_to_pandas(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "index, columns, item",
+    [
+        [slice(None), ["x"], native_pd.DataFrame({"x": ["abc", "xyz"]}, index=[1, 1])],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_duplicate_index_negative(index, columns, item):
+    data = {"x": ["a", "b", "c", "d"]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def helper(df):
+        if isinstance(df, pd.DataFrame):
+            if isinstance(item, native_pd.DataFrame):
+                item_ = pd.DataFrame(item)
+            elif isinstance(item, native_pd.Series):
+                item_ = pd.Series(item)
+            else:
+                item_ = item
+            df.loc[index, columns] = item_
+        else:
+            df.loc[index, columns] = item
+
+    # pandas raise error when duplicate index in rhs item
+    with pytest.raises(ValueError):
+        helper(native_df)
+    # Snowpark pandas perform a left join behavior which leads to more rows
+    helper(snow_df)
+    assert snow_df.index.to_list() == [0, 1, 1, 2, 3]
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [
+        (slice(None), ["A", "B"]),
+        (slice(None), ["B", "A"]),
+        (["z", "w"], slice(None)),
+        (["w", "z"], slice(None)),
+        (["z", "w"], ["A", "B"]),
+        (["w", "z"], ["B", "A"]),
+        (slice(None), slice(None)),
+        (slice(None), native_pd.Series(["A", "B"])),
+        (slice(None), native_pd.Series(["B", "A"], index=["A", "B"])),
+    ],
+)
+@pytest.mark.parametrize("item_type", ["numpy_array", "native_list"])
+def test_df_loc_set_item_2d_array(indexer, item_type):
+    from math import prod
+
+    query_count = 1
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_indexers = []
+    for i in indexer:
+        if isinstance(i, native_pd.Series):
+            query_count += 1
+            snow_indexers.append(pd.Series(i))
+        else:
+            snow_indexers.append(i)
+    snow_indexers = tuple(snow_indexers)
+
+    snow_df = pd.DataFrame(native_df)
+
+    # Rather than re-shaping a NumPy array to ensure we get the correct types, we can just
+    # use pandas' loc behavior to get the right shape.
+    item = np.arange(prod(native_df.loc[indexer].shape)).reshape(
+        native_df.loc[indexer].shape
+    )
+
+    if item_type == "native_list":
+        item = [list(i) for i in item]
+
+    def loc_set_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[indexer] = item
+        else:
+            df.loc[snow_indexers] = item
+
+    expected_join_count = 4
+    if isinstance(indexer[0], slice):
+        expected_join_count = 1
+
+    with SqlCounter(query_count=query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_set_helper,
+            inplace=True,
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_df_loc_set_scalar_indexer_2d_array_negative():
+    # pandas error: ValueError: setting an array element with a sequence.
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    def loc_set_helper(df):
+        df.loc["z", "A"] = [[1]]
+
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        loc_set_helper,
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        inplace=True,
+        assert_exception_equal=False,
+        expect_exception_match="Scalar indexer incompatible with list item",
+    )
+
+    def loc_set_helper(df):
+        df.loc["z", "A"] = [[1, 2, 3, 4]]
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        loc_set_helper,
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        inplace=True,
+        assert_exception_equal=False,
+        expect_exception_match="Scalar indexer incompatible with list item",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_df_loc_set_item_2d_array_scalar_row_loc_negative():
+    # Test when scalar row loc is duplicated.
+    snow_df = pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+    val = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
+    # This succeeds in pandas, but we fail.
+    with pytest.raises(
+        ValueError,
+        match="Scalar indexer incompatible with ndarray item",
+    ):
+        snow_df.loc["x", :] = val
+
+    # Test when single scalar row loc
+    # This fails in pandas as well, but with a different error.
+    with pytest.raises(
+        ValueError,
+        match="Scalar indexer incompatible with ndarray item",
+    ):
+        snow_df.loc["w", :] = val
+
+
+def test_df_loc_set_item_2d_array_row_length_no_match():
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+
+    # When there are too many rows
+    with pytest.raises(
+        ValueError,
+        match=r"setting an array element with a sequence.",
+    ):
+        native_df.loc[["x", "y"], :] = val
+
+    def loc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[["z", "y"], :] = val[:-1]
+        else:
+            df.loc[["z", "y"], :] = val
+
+    with SqlCounter(query_count=1, join_count=4):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_helper,
+            inplace=True,
+        )
+
+    # When there is exactly one row (pandas will broadcast, we ffill).
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def loc_helper(df):
+        df.loc[["x", "y"], :] = val[:-2]
+
+    with SqlCounter(query_count=1, join_count=4):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_helper,
+            inplace=True,
+        )
+
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def loc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df.loc[["x", "y", "z", "w"], :] = [
+                list(val[0]),
+                list(val[1]),
+                list(val[1]),
+                list(val[1]),
+            ]
+        else:
+            snow_df.loc[["x", "y", "z", "w"], :] = val[:2]
+
+    with SqlCounter(query_count=1, join_count=4):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_helper,
+            inplace=True,
+        )
+
+
+def test_df_loc_set_item_2d_array_col_length_no_match():
+    # pandas error message:
+    # ValueError: shape mismatch: value array of shape <VALUE_SHAPE> could not
+    # be broadcast to indexing result of shape <INDEXED_SELF_SHAPE>
+    # Snowpark pandas error message:
+    # ValueError: shape mismatch: the number of columns <NUM_VALUE_COLS> from the item
+    # does not match with the number of columns <NUM_INDEXED_SELF_COLS> to set
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+
+    # When there are too few cols
+    def loc_helper(df):
+        df.loc[["x", "y"], :] = val[:, :-1]
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_helper,
+            inplace=True,
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            assert_exception_equal=False,
+            expect_exception_match="shape mismatch: the number of columns 3 from the item does not match with the number of columns 4 to set",  # Our error message is slightly different from pandas.
+        )
+
+    # When there are too many cols
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def loc_helper(df):
+        df.loc[["x", "y"], :] = np.hstack((val, val[:, :1]))
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            loc_helper,
+            inplace=True,
+            assert_exception_equal=False,
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="shape mismatch: the number of columns 5 from the item does not match with the number of columns 4 to set",  # Our error message is slightly different from pandas.
+        )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_2d_array_with_explicit_na_values():
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array(
+        [
+            [1, np.nan, 3, 4],
+            [5, np.nan, 7, 8],
+            [9, None, np.nan, 12],
+            [np.nan, None, np.nan, 15],
+        ]
+    )
+
+    def loc_helper(df):
+        df.loc[:, :] = val
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        loc_helper,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_2d_array_with_ffill_na_values_negative():
+    # Ideally, we want NA values to be propagated if they are the last
+    # value present, but we currently do not support this.
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array([[1, 2, 3, 4], [5, np.nan, 7, 8]])
+    ffilled_vals = np.array(
+        [[1, 2, 3, 4], [5, np.nan, 7, 8], [5, 2, 7, 8], [5, 2, 7, 8]]
+    )
+
+    def loc_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # This is what it would be if our ffilling would correctly propagate NA values.
+            # df.loc[:, :] = [list(val[0]), list(val[1]), list(val[1]), list(val[1])]
+            # Instead, our ffill value picks the most recent *non-NA* value
+            df.loc[:, :] = ffilled_vals
+        else:
+            df.loc[:, :] = val
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        loc_helper,
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize("item", EMPTY_LIST_LIKE_VALUES)
+@sql_count_checker(query_count=0)
+def test_df_loc_set_with_empty_key_and_empty_item_negative(
+    key,
+    item,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+):
+    # df.loc[empty list-like/series key] = empty list-like/series item
+    # ----------------------------------------------------------------
+    # Both native pandas and Snowpark pandas fail because the key and value are of different lengths.
+    # Both raise ValueErrors with different error messages.
+    # Snowpark pandas Exception: "The length of the value/item to set is empty"
+    # Native pandas Exception: "Must have equal len keys and value when setting with an iterable"
+
+    def loc_set_helper(df):
+        if isinstance(key, native_pd.Series) and isinstance(df, pd.DataFrame):
+            df.loc[pd.Series(key)] = item
+        else:
+            df.loc[key] = item
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        loc_set_helper,
+        expect_exception=True,
+        assert_exception_equal=False,
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_series_loc_set_with_empty_key_and_scalar_item(
+    key,
+    default_index_snowpark_pandas_df,
+    default_index_native_df,
+    simple_snowpark_pandas_df,
+    simple_native_pandas_df,
+):
+    # df.loc[empty list-like/series key] = scalar item
+    # ------------------------------------------------
+    # In native pandas, there is no change to the df. Snowpark pandas mirrors this behavior in most cases.
+
+    item = 32  # any scalar
+
+    def loc_set_helper(df):
+        if isinstance(key, native_pd.Series) and isinstance(df, pd.DataFrame):
+            df.loc[pd.Series(key)] = item
+        else:
+            df.loc[key] = item
+
+    # CASE 1: type of Snowflake column matches item type:
+    # The df should not change.
+    with SqlCounter(query_count=1, join_count=2):
+        eval_snowpark_pandas_result(
+            simple_snowpark_pandas_df,
+            simple_native_pandas_df,
+            loc_set_helper,
+            inplace=True,
+        )
+
+    # CASE 2: Snowflake SQL Exception: type of Snowflake column does not match item type:
+    # In the case of Snowpark pandas, two columns in the df used (`default_index_snowpark_pandas_df`) are columns of
+    # `ARRAY` type in Snowflake. The item being set to this column is an int, there is a type mismatch. Therefore,
+    # Snowflake raises the error:
+    # SnowparkSQLException: SQL compilation error: Can not convert parameter '32' of type [NUMBER(2,0)] into expected
+    # type [ARRAY]
+
+    def loc_set_helper(df):
+        if isinstance(key, native_pd.Series) and isinstance(df, pd.DataFrame):
+            df.loc[pd.Series(key)] = item
+        else:
+            df.loc[key] = item
+
+    # Using pytest raises since the eval method cannot verify exceptions if they differ from native pandas.
+    err_msg = "Can not convert parameter"
+    with SqlCounter(query_count=0, join_count=0):
+        with pytest.raises(SnowparkSQLException, match=err_msg):
+            eval_snowpark_pandas_result(
+                default_index_snowpark_pandas_df,
+                default_index_native_df,
+                loc_set_helper,
+                inplace=True,
+            )
+
+
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_df_loc_set_with_empty_key_and_list_like_item(
+    key,
+    simple_snowpark_pandas_df,
+    simple_native_pandas_df,
+):
+    # df.loc[empty list-like/series key] = list-like item
+    # ---------------------------------------------------
+    # Snowpark pandas and Native pandas have the same behavior -- nothing in the df changes.
+
+    item = pd.Index([random.randint(0, 4) for _ in range(4)])
+
+    def loc_set_helper(df):
+        _key = key
+        if isinstance(df, pd.DataFrame):
+            _key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+        df.loc[_key] = item
+
+    with SqlCounter(query_count=1, join_count=2):
+        eval_snowpark_pandas_result(
+            simple_snowpark_pandas_df,
+            simple_native_pandas_df,
+            loc_set_helper,
+            inplace=True,
+        )
+
+
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_df_loc_set_with_empty_key_and_series_item_negative(
+    key,
+    simple_snowpark_pandas_df,
+    simple_native_pandas_df,
+):
+    # df.loc[empty list-like/series key] = series item
+    # ------------------------------------------------
+    # In native pandas, the df does not change.
+    # In Snowpark pandas, ValueError: "shape mismatch: the number of columns 1 from the item does not match with the
+    # number of columns 4 to set"
+
+    item = native_pd.Series([random.randint(0, 4) for _ in range(4)])
+
+    def loc_set_helper(df):
+        _key, _item = key, item
+        if isinstance(df, pd.DataFrame):
+            _key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+            _item = pd.Series(item) if isinstance(item, native_pd.Series) else item
+        df.loc[_key] = _item
+
+    with SqlCounter(query_count=0, join_count=0):
+        err_msg = "setting an array element with a sequence."
+        with pytest.raises(ValueError, match=err_msg):
+            eval_snowpark_pandas_result(
+                simple_snowpark_pandas_df,
+                simple_native_pandas_df,
+                loc_set_helper,
+                inplace=True,
+            )
+
+
+@pytest.mark.parametrize("key", [True, False, 0, 1])
+@pytest.mark.parametrize(
+    "index",
+    [
+        [0, 1, True, False, "x"],
+        [0, 1, True, "x"],
+        [1, True, False, "x"],
+        [1, True, "x"],
+        [0, 1, False, "x"],
+        [0, False, "x"],
+        [2, "x"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_boolean_key(key, index):
+    item = 99
+    num_columns = 3
+    data = {str(chr(ord("a") + i)): list(range(len(index))) for i in range(num_columns)}
+    native_df = native_pd.DataFrame(data, index=index)
+    snow_df = pd.DataFrame(native_df)
+
+    snow_df.loc[key] = item
+
+    # In pandas, setitem for 0/False and 1/True will potentially set multiple values or fail to set at all if neither
+    # keys already exist in the index.  In Snowpark pandas we treat 0 & False, and 1 & True as distinct values, so
+    # they are independently settable, whether they exist or do not already exist in the series index.
+    try:
+        key_index = [str(v) for v in native_df.index].index(str(key))
+        native_df.iloc[key_index] = item
+    except ValueError:
+        native_df = native_pd.concat(
+            [
+                native_df,
+                native_pd.DataFrame(
+                    [item for i in range(len(native_df.columns))],
+                    index=native_df.columns.tolist(),
+                    columns=[key],
+                ).T,
+            ]
+        )
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
+
+
+@sql_count_checker(query_count=6)
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda df: df.loc["2013"],
+        lambda df: df["2013-1-15":"2013-1-15 12:30:00"],
+    ],
+)
+def test_df_partial_string_indexing(ops):
+    native_df = native_pd.DataFrame(
+        np.random.randn(100000, 1),
+        columns=["A"],
+        index=native_pd.date_range("20130101", periods=100000, freq="min"),
+    )
+
+    snowpark_df = pd.DataFrame(native_df)
+
+    # need to set check_freq=False since Snowpark pandas index's freq is always null
+    eval_snowpark_pandas_result(snowpark_df, native_df, ops, check_freq=False)
+
+
+@sql_count_checker(query_count=1)
+def test_df_partial_string_indexing_with_timezone():
+    native_df = native_pd.DataFrame(
+        [0], index=pd.DatetimeIndex(["2019-01-01"], tz="America/Los_Angeles")
+    )
+
+    snowpark_df = pd.DataFrame(native_df)
+
+    # need to set check_freq=False since Snowpark pandas index's freq is always null
+    eval_snowpark_pandas_result(
+        snowpark_df,
+        native_df,
+        lambda df: df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"],
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_df_single_value_with_slice_key():
+    native_df = native_pd.DataFrame([0], index=[0])
+
+    snowpark_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df.loc[0:1])
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_set_none():
+    native_df = native_pd.DataFrame({"a": [1, 2, 3]})
+
+    def loc_set_helper(df):
+        df.loc[None, "a"] = 100
+
+    # pandas raises KeyError where loc with None on a dataframe, but works well on a series, see
+    # test_series_loc_set_none()
+    with pytest.raises(KeyError, match="None"):
+        loc_set_helper(native_df)
+
+    # Snowpark pandas will do the row enlargement correctly
+    df = pd.DataFrame(native_df)
+    loc_set_helper(df)
+
+    assert_frame_equal(
+        df,
+        native_pd.DataFrame({"a": [1, 2, 3, 100]}, index=[0, 1, 2, None]),
+        check_dtype=False,
+    )
diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py
new file mode 100644
index 00000000000..3b513a00b78
--- /dev/null
+++ b/tests/integ/modin/frame/test_mask.py
@@ -0,0 +1,957 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import re
+from typing import Any, Optional, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="module")
+def test_data():
+    return {
+        "A": [1, 2, 3, 4, 5],
+        "B": [2, 3, 4, 5, 6],
+        "C": [3, 4, 5, 6, 7],
+        "D": [4, 5, 6, 7, 8],
+        "E": [5, 6, 7, 8, 9],
+    }
+
+
+@pytest.fixture(scope="module")
+def test_cond():
+    return {
+        "A": [True, False, True, False, True],
+        "B": [False, True, False, True, False],
+        "C": [True, False, True, False, True],
+        "D": [False, True, False, True, False],
+        "E": [True, False, True, False, True],
+    }
+
+
+@pytest.fixture(scope="module")
+def test_others():
+    return {
+        "A": [200, 201, 202, 203, 204],
+        "B": [300, 301, 302, 303, 304],
+        "C": [400, 401, 402, 403, 404],
+        "D": [500, 501, 502, 503, 504],
+        "E": [600, 601, 602, 603, 604],
+    }
+
+
+def make_test_dataframe(df_data, preprocess_df=None):
+    if isinstance(df_data, tuple):
+        native_df = native_pd.DataFrame(df_data[0], columns=df_data[1])
+        snow_df = pd.DataFrame(df_data[0], columns=df_data[1])
+    else:
+        native_df = native_pd.DataFrame(df_data)
+        snow_df = pd.DataFrame(df_data)
+
+    if preprocess_df:
+        native_df = preprocess_df(native_df)
+        snow_df = preprocess_df(snow_df)
+
+    return native_df, snow_df
+
+
+def dataframe_with_test_args(df, test_args):
+    if test_args:
+        if test_args[0]:
+            df.set_index(test_args[0], inplace=True)
+        if test_args[1]:
+            df.columns = test_args[1]
+    return df
+
+
+def make_native_dataframe_or_scalar(test_data, test_args):
+    if isinstance(test_data, tuple):
+        df = native_pd.DataFrame(test_data[0], columns=test_data[1])
+    elif isinstance(test_data, dict):
+        df = native_pd.DataFrame(test_data)
+    else:
+        # It is not a dataframe, but rather a scalar value or callable
+        return test_data
+
+    return dataframe_with_test_args(df, test_args)
+
+
+def make_snow_dataframe(test_data, test_args):
+    if isinstance(test_data, tuple):
+        df = pd.DataFrame(test_data[0], columns=test_data[1])
+    elif isinstance(test_data, dict):
+        df = pd.DataFrame(test_data)
+    else:
+        # It is not a dataframe, but rather a scalar value
+        return test_data
+
+    return dataframe_with_test_args(df, test_args)
+
+
+def mask_test_helper(
+    df_data_list: list[Union[list, dict, np.array, Any]],
+    df_data_args: Optional[list[tuple[Any]]] = None,
+    coerce_to_float64: bool = True,
+    expect_exception: bool = False,
+    expect_exception_type: Optional[type[Exception]] = None,
+    expect_exception_match: Optional[str] = None,
+    assert_exception_equal: bool = True,
+    extra_mask_args: Optional[dict[Any, Any]] = None,
+):
+    """
+    Helper for validating pivot_table tests, specifically this ensures the output is normalized to float64
+    with acceptable precision so can compare the results if coerce_to_float64 is True.
+
+    df_data_list: The raw data to be put in df as list of data/columns, dictionary of data values (col:series) or np.array
+    df_data_args: Arguments to pass to `make_{native|snow}_dataframe{_or_scalar}
+    coerce_to_float64: Coerce the results to float64 result since irrational numbers can result in object type
+    expect_exception: Whether the call *should* raise an exception
+    expect_exception_type: if not None, assert the exception type is expected
+    expect_exception_match: if not None, assert the exception match the expected regex
+    assert_exception_equal: bool. Whether to assert the exception from Snowpark pandas eqauls to pandas
+    extra_mask_args: Any additional arguments to pass to mask as a dictionary.
+    """
+    extra_mask_args = extra_mask_args or {}
+    df_data_args = df_data_args or [None] * len(df_data_list)
+    native_dfs = [
+        make_native_dataframe_or_scalar(data, data_args)
+        for data, data_args in zip(df_data_list, df_data_args)
+    ]
+    snow_dfs = [
+        make_snow_dataframe(data, data_args)
+        for data, data_args in zip(df_data_list, df_data_args)
+    ]
+
+    eval_snowpark_pandas_result(
+        snow_dfs[0],
+        native_dfs[0],
+        lambda df: df.mask(native_dfs[1], native_dfs[2], **extra_mask_args)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(snow_dfs[1], snow_dfs[2], **extra_mask_args),
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64
+        if coerce_to_float64
+        else assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+        expect_exception=expect_exception,
+        expect_exception_type=expect_exception_type,
+        expect_exception_match=expect_exception_match,
+        assert_exception_equal=assert_exception_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["E", "D", "C"]),
+            (["A", "B"], ["C", "E", "D"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "C"]),
+            (["A", "B"], ["X", "Y", "C"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+        ),
+        (
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_cond_and_others_unmatching_data_column_incompatible_index_type(
+    test_data, test_cond, test_others, test_args
+):
+    mask_test_helper([test_data, test_cond, test_others], test_args)
+
+
+# TODO: (SNOW-953408) There are two joins in the query, but the sql counter only verifies one.
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_cond_and_others_unmatching_data_column_compatible_index_type(
+    test_data, test_cond, test_others
+):
+    mask_test_helper([test_data, test_cond, test_others], (None, None, None))
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (None, None, None),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["E", "D", "C"]),
+            (["A", "B"], ["C", "E", "D"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "C"]),
+            (["A", "B"], ["X", "Y", "C"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+        ),
+        (
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_cond_and_others_matching_index(
+    test_data, test_cond, test_others, test_args
+):
+    data = test_data.copy()
+    cond = test_cond.copy()
+    others = test_others.copy()
+
+    # Ensure they have matching index values or won't get any results
+    if test_args and test_args[0]:
+        for col in test_args[0][0]:
+            cond[col] = data[col]
+            others[col] = data[col]
+
+    mask_test_helper([data, cond, others], test_args)
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["B", "C", "D", "E"]),
+        ),
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["E", "D", "C", "B"]),
+            (["A"], ["B", "D", "C", "E"]),
+        ),
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["F", "G", "H", "I"]),
+            (["A"], ["J", "K", "L", "M"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_cond_and_others_succeed_in_snowflake(
+    test_data, test_cond, test_others, test_args
+):
+    snow_df = pd.DataFrame(test_data)
+    snow_cond_df = pd.DataFrame(test_cond)
+    snow_other_df = pd.DataFrame(test_others)
+
+    # Note the corresponding mask in pandas fails in what looks like a bug since the mask and data sizes are the
+    # same in contrast to the error message.
+    #
+    # 'ValueError: putmask: mask and data must be the same size
+
+    snow_result_df = snow_df.mask(snow_cond_df, snow_other_df)
+
+    native_df = native_pd.DataFrame(test_data)
+    native_cond_df = native_pd.DataFrame(test_cond)
+    native_other_df = native_pd.DataFrame(test_others)
+
+    expected_result_df = native_df.mask(native_cond_df, native_other_df)
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+        snow_result_df, expected_result_df
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_test",
+    [
+        lambda x: x >= 6,
+        lambda x: x % 2 == 0,
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_dataframe_mask_with_cond_is_lambda(test_data, cond_test):
+    mask_test_helper([test_data, cond_test, None], [[["A", "B"], None], None, None])
+
+
+@pytest.mark.parametrize(
+    "cond_test",
+    [
+        lambda x: True,
+        lambda x: False,
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dataframe_mask_with_cond_is_lambda_true_and_false(test_data, cond_test):
+    mask_test_helper(
+        [test_data, cond_test, None],
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+        expect_exception_match="Array conditional must be same shape as self",
+    )
+
+
+@pytest.mark.parametrize(
+    ["other_test", "sql_query_count", "join_count"],
+    [
+        (lambda x: x**2, 1, 2),
+        (lambda x: x + x, 1, 2),
+        (lambda x: x, 1, 2),
+        (lambda y: y + 10, 1, 2),
+    ],
+)
+def test_dataframe_mask_with_other_is_lambda(
+    test_data,
+    test_cond,
+    other_test,
+    sql_query_count,
+    join_count,
+):
+    with SqlCounter(
+        query_count=sql_query_count,
+        join_count=join_count,
+    ):
+        cond = test_cond.copy()
+        cond["A"] = test_data["A"]
+        cond["B"] = test_data["B"]
+        mask_test_helper(
+            [test_data, cond, other_test],
+            [[["A", "B"], None], [["A", "B"], None], None],
+        )
+
+
+@pytest.mark.parametrize("test_other_scalar", [None, -1, 99.9])
+@sql_count_checker(query_count=1, join_count=1)
+def test_dataframe_mask_with_cond_and_scalar_others(
+    test_data, test_cond, test_other_scalar
+):
+    mask_test_helper([test_data, test_cond, test_other_scalar])
+
+
+@pytest.mark.parametrize(
+    "test_other_scalar",
+    [
+        123,
+        99.987,
+        "x",
+        True,
+        lambda x: "x" if min(x) == 3 else "y",
+        dict(zip(list("ABCDE"), [list("ABCDE")] * 5)),
+    ],
+)
+def test_dataframe_mask_with_cond_and_scalar_others_with_type_incompatible(
+    test_data, test_cond, test_other_scalar
+):
+    expected_join_count = 2 if isinstance(test_other_scalar, dict) else 1
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        mask_test_helper([test_data, test_cond, test_other_scalar])
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_mask_cond_non_boolean_negative_test(
+    test_data, test_cond, test_others
+):
+    test_cond2 = test_cond.copy()
+    test_cond2["C"] = ["a", "b", "c", "d", "e"]
+
+    # pandas throws a different error than should
+    # be expected with mask.
+    # https://github.com/pandas-dev/pandas/issues/56330
+
+    snow_dfs = [
+        make_snow_dataframe(data, None) for data in [test_data, test_cond2, test_others]
+    ]
+
+    with pytest.raises(
+        ValueError, match="Boolean array expected for the condition, not object"
+    ):
+        snow_dfs[0].mask(snow_dfs[1], snow_dfs[2])
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_mask_cond_is_none_negative(test_data):
+    mask_test_helper(
+        [test_data, None, None],
+        expect_exception=True,
+        # Snowpark pandas gives a more descriptive error than pandas, which
+        # fails because it attempts to use the `~` operator on None.
+        expect_exception_match=r"Array conditional must be same shape as self",
+        expect_exception_type=ValueError,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=12, fallback_count=1, sproc_count=1)
+def test_dataframe_mask_with_fallback(test_data, test_cond, test_others):
+    index_data = [["A", "B"], ["C", "D", "E"]]
+
+    mask_test_helper(
+        [test_data, test_cond, test_others],
+        [index_data, index_data, index_data],
+        extra_mask_args={"axis": 1},
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_dataframe_mask_cond_is_array(caplog):
+    data = [[1, 2], [3, 4]]
+    cond = np.array([[True, False], [False, True]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.mask(cond))
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_mask_cond_is_array_wrong_size_negative():
+    data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]
+    cond = np.array([[True, False], [False, True]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(cond),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_mask_with_callable_cond():
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    class CallableCond:
+        def __call__(self, df):
+            return df % 2 == 0
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(CallableCond(), -1),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_mask_with_callable_other():
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    class CallableOther:
+        def __call__(self, df):
+            return df**2
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(df % 2 == 0, CallableOther()),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_dataframe_mask_other_is_array():
+    data = [[1, 3], [2, 4]]
+    other = np.array([[99, -99], [101, -101]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.mask(lambda x: x >= 3, other)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_mask_other_is_array_wrong_size_negative():
+    data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]
+    other = np.array([[99, -99], [101, -101]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(lambda x: x >= 3, other),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_mask_sizes_do_not_match_negative_test(test_data, test_cond):
+    snow_df = pd.DataFrame(test_data)
+    snow_df.set_index(["A"], inplace=True)
+
+    snow_cond_df = pd.DataFrame(test_cond)
+    snow_cond_df.set_index(["B", "C"], inplace=True)
+
+    with pytest.raises(ValueError, match="cannot join with no overlapping index names"):
+        snow_df.mask(snow_cond_df)
+
+
+@sql_count_checker(query_count=3, join_count=2)
+def test_dataframe_mask_with_np_array_cond():
+    data = [1, 2, 3]
+    cond = np.array([[False, True, False]]).T
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other,
+        columns=["A"],
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other,
+        columns=["A"],
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(cond, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(cond, snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=2)
+def test_dataframe_mask_with_np_array_cond_mismatched_labels():
+    data = [1, 2, 3]
+    cond = np.array([[False, True, False]]).T
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(cond, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(cond, snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_dataframe_cond_single_index_different_names():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_cond_df = pd.DataFrame(cond, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_cond_df = native_pd.DataFrame(cond, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(native_cond_df, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(snow_cond_df, snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_dataframe_cond_single_index_different_names_2():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"], index=pd.Index([1, 2, 3], name="B"))
+    snow_cond_df = pd.DataFrame(cond, columns=["A"])
+    snow_other_df = pd.DataFrame(other, columns=["B"])
+
+    native_df = native_pd.DataFrame(
+        data, columns=["A"], index=pd.Index([1, 2, 3], name="B")
+    )
+    native_cond_df = native_pd.DataFrame(cond, columns=["A"])
+    native_other_df = native_pd.DataFrame(other, columns=["B"])
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(native_cond_df, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(snow_cond_df, snow_other_df),
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_frame",
+    [
+        native_pd.DataFrame({"A": [False, True, False, True]}),
+        native_pd.DataFrame(
+            {"A": [False, True, False, True], "B": [False, False, False, True]}
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        10,
+        native_pd.DataFrame({"A": [6, 6, 6, 10]}),
+        native_pd.DataFrame({"A": [6, 6, 6, 10], "B": [8, 8, 9, 9]}),
+    ],
+)
+def test_dataframe_mask_with_duplicated_index_aligned(cond_frame, other):
+    data = [3, 4, 5, 2]
+    # index with duplicated value 2
+    index = pd.Index([2, 1, 2, 3], name="index")
+    snow_df = pd.DataFrame({"A": data}, index=index)
+    native_df = native_pd.DataFrame({"A": data}, index=index)
+
+    native_cond = cond_frame
+    native_cond.index = index
+    snow_cond = pd.DataFrame(native_cond)
+
+    if isinstance(other, native_pd.DataFrame):
+        native_other = other
+        native_other.index = index
+        snow_other = pd.DataFrame(native_other)
+    else:
+        native_other = other
+        snow_other = other
+
+    expected_join_count = 1 if isinstance(other, int) else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.mask(native_cond, native_other)
+            if isinstance(df, native_pd.DataFrame)
+            else df.mask(snow_cond, snow_other),
+        )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_mask_with_duplicated_index_unaligned():
+    data = [3, 4, 5, 2]
+    df_index = pd.Index([2, 1, 2, 3], name="index")
+    snow_df = pd.DataFrame({"A": data}, index=df_index)
+
+    index = pd.Index([1, 2, 2, 3], name="index")
+    cond_data = [False, True, False, True]
+    other_data = [4, 5, 6, 7]
+    snow_cond = pd.DataFrame({"A": cond_data}, index=index)
+    snow_other = pd.DataFrame({"A": other_data}, index=index)
+    snow_res = snow_df.mask(snow_cond, snow_other)
+
+    # This behavior is different compare with native pandas. If index value in other and condition contains
+    # duplication, and not aligned with the index of the dataframe, Native pandas errors out with
+    # ValueError: putmask: mask and data must be the same size
+    # Snowpark pandas will suceed with left join behavior because validation of index uniqueness
+    # requires eager evaluation.
+    expected_pandas = native_pd.DataFrame(
+        {"A": [5, 6, 3, 3, 4, 5, 6, 5, 5, 7]},
+        index=pd.Index([2, 2, 2, 2, 1, 2, 2, 2, 2, 3], name="index"),
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_res, expected_pandas
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_column_names, others_column_names, expected_error_msg",
+    [
+        (
+            ["A", "A", "C", "D", "E"],
+            None,
+            "Multiple columns are mapped to each label in ['A'] in DataFrame condition",
+        ),
+        (
+            None,
+            ["A", "C", "C", "D", "E"],
+            "Multiple columns are mapped to each label in ['C'] in DataFrame other",
+        ),
+        (
+            ["A", "B", "C", "C", "F"],
+            ["A", "C", "C", "D", "E"],
+            "Multiple columns are mapped to each label in ['C'] in DataFrame condition",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dataframe_mask_with_duplicated_columns_negative(
+    test_data,
+    test_cond,
+    test_others,
+    cond_column_names,
+    others_column_names,
+    expected_error_msg,
+):
+    native_df = native_pd.DataFrame(test_data)
+    native_cond = native_pd.DataFrame(test_cond)
+    native_other = native_pd.DataFrame(test_others)
+    snow_df = pd.DataFrame(native_df)
+    snow_cond = pd.DataFrame(native_cond)
+    snow_other = pd.DataFrame(native_other)
+    # set the column to new names
+    if cond_column_names is not None:
+        native_cond.columns = cond_column_names
+        snow_cond.columns = cond_column_names
+    if others_column_names is not None:
+        native_other.columns = others_column_names
+        snow_other.columns = others_column_names
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.mask(native_cond, native_other)
+        if isinstance(df, native_pd.DataFrame)
+        else df.mask(snow_cond, snow_other),
+        expect_exception=True,
+        assert_exception_equal=False,
+        # The error message raised under those cases are different from native pandas.
+        # Native pandas raises ValueError with message "cannot reindex on an axis with duplicate labels"
+        # for duplication occurs in the condition frame,and raises InvalidIndexError with no message for
+        # duplication occurs in other frame.
+        # Snowpark pandas gives a clear message to the customer about what is the problem with the code.
+        expect_exception_type=ValueError,
+        expect_exception_match=re.escape(expected_error_msg),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_mask_cond_with_base_df_filter_on_key():
+    native_df = native_pd.DataFrame({"key": [0, 1], "value": [2, 3]})
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.value.mask(df.key == 0)
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [[True], [True, False, True], [True, False, True, False]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_mask_series_cond(data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    def perform_mask(df):
+        if isinstance(df, pd.DataFrame):
+            return df.mask(pd.Series(data, name="SERIES_NAME"))
+        else:
+            return df.mask(native_pd.Series(data, name="SERIES_NAME"))
+
+    eval_snowpark_pandas_result(snow_df, native_df, perform_mask)
+
+
+@pytest.mark.parametrize(
+    "cond",
+    [1, [1], [[1]]],
+    ids=["scalar_cond", "scalar_cond_in_list", "scalar_cond_in_nested_list"],
+)
+def test_mask_with_scalar_cond(cond):
+    native_ser = native_pd.DataFrame([[1, 2, 3]])
+    snow_ser = pd.DataFrame(native_ser)
+
+    sql_count = 1 if isinstance(cond, list) else 0
+
+    with SqlCounter(query_count=sql_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.mask(cond, 1),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Array conditional must be same shape as self",
+            assert_exception_equal=True,
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_mask_series_other_axis_not_specified():
+    native_df = native_pd.DataFrame([[1, 2, 3]])
+    snow_df = pd.DataFrame(native_df)
+
+    def perform_mask(df):
+        if isinstance(df, pd.DataFrame):
+            return df.mask([[True] * 3], pd.Series([1, 2, 3]))
+        else:
+            return df.mask([[True] * 3], native_pd.Series([1, 2, 3]))
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_mask,
+        assert_exception_equal=False,
+        expect_exception=True,
+        expect_exception_match=r"df.mask requires an axis parameter \(0 or 1\) when given a Series",
+        expect_exception_type=ValueError,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=2)
+@pytest.mark.parametrize(
+    "data",
+    [[10], [10, 11, 12], [10, 11, 12, 13]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+@pytest.mark.parametrize(
+    "index", [True, False], ids=["matching_index", "unmatched_index"]
+)
+def test_mask_series_other_axis_0(index, data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # Can't use string index here for unmatched index, since Snowpark Pandas does not support
+    # join with different index types.
+    index = [0, 1, 2, 3] if index else [4, 5, 6, 7]
+    index = index[: len(data)]
+
+    def perform_mask(df):
+        if isinstance(df, pd.DataFrame):
+            return df.mask(
+                [[True] * 3, [True] * 3, [True] * 3],
+                pd.Series(data, index=index),
+                axis=0,
+            )
+        else:
+            return df.mask(
+                [[True] * 3, [True] * 3, [True] * 3],
+                native_pd.Series(data, index=index),
+                axis=0,
+            )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_mask,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=2, union_count=1)
+@pytest.mark.parametrize(
+    "data",
+    [[10], [10, 11, 12], [10, 11, 12, 13]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+@pytest.mark.parametrize(
+    "index", [True, False], ids=["matching_index", "unmatched_index"]
+)
+def test_mask_series_other_axis_1(index, data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # Can't use int index here for unmatched index, since Snowpark Pandas does not support
+    # join with different index types.
+    index = [f"col{i}" for i in (range(4) if index else range(4, 9))]
+    index = index[: len(data)]
+
+    def perform_mask(df):
+        if isinstance(df, pd.DataFrame):
+            return df.mask(
+                [[True] * 3, [True] * 3, [True] * 3],
+                pd.Series(data, index=index),
+                axis=1,
+            )
+        else:
+            return df.mask(
+                [[True] * 3, [True] * 3, [True] * 3],
+                native_pd.Series(data, index=index),
+                axis=1,
+            )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_mask,
+    )
diff --git a/tests/integ/modin/frame/test_melt.py b/tests/integ/modin/frame/test_melt.py
new file mode 100644
index 00000000000..fba6ce6268d
--- /dev/null
+++ b/tests/integ/modin/frame/test_melt.py
@@ -0,0 +1,305 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as npd
+import pytest
+from modin.pandas import DataFrame
+from pandas.core.dtypes.common import is_list_like
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.unpivot_utils import (
+    _general_unpivot,
+    _simple_unpivot,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+data = [
+    {"frame": {"abc": ["A", np.NaN, "C"], "123": ["1", "2", np.NaN]}, "kargs": {}},
+    {"frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]}, "kargs": {}},
+    {"frame": {"abc": ["A", "B", "C"], "123": [1, 2, 3]}, "kargs": {}},
+    {"frame": {"123": [1, 2, 3], "456": [4, 5, 6]}, "kargs": {}},
+    {"frame": {"123": [1, 2, 3], "456": [4.5, 5.5, 6.5]}, "kargs": {}},
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"id_vars": ["abc"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": [1, 2, 3]},
+        "kargs": {"id_vars": ["abc"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"value_vars": ["abc"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": [1, 2, 3]},
+        "kargs": {"value_vars": ["abc"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"id_vars": ["abc"], "value_vars": ["123"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"], "456": [4, 5, 6]},
+        "kargs": {"id_vars": ["abc"], "value_vars": ["123", "456"]},
+    },
+    {
+        "frame": {
+            "A": {0: "a", 1: "b", 2: "c"},
+            "B": {0: 1, 1: 3, 2: 5},
+            "C": {0: 2, 1: 4, 2: 6},
+        },
+        "kargs": {"id_vars": ["A"], "value_vars": ["B", "C"]},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"var_name": "independent"},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"value_name": "dependent"},
+    },
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {"var_name": "independent", "value_name": "dependent"},
+    },
+    # The order of the resulting dataframe should match the order of the columns
+    {
+        "frame": {"abc": ["A", "B", "C"], "123": ["1", "2", "3"], "456": [4, 5, 6]},
+        "kargs": {},
+    },
+    {
+        "frame": {"456": [4, 5, 6], "abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        "kargs": {},
+    },
+    {
+        "frame": {
+            "123": ["1", "2", "3"],
+            "456": [4, 5, 6],
+            "abc": ["A", "B", "C"],
+        },
+        "kargs": {},
+    },
+    {"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, "2", "3"]}, "kargs": {}},
+    {"frame": {"abc": ["A", np.NaN, np.NaN], "123": [np.NaN, 2, 3]}, "kargs": {}},
+]
+
+
+def run_internal_melt(
+    df,
+    id_vars=None,
+    value_vars=None,
+    var_name="variable",
+    value_name="value",
+    col_level=None,
+    ignore_index=True,
+    use_simple_unpivot=True,
+) -> DataFrame:
+
+    # Since we are skipping a few steps to force a particular
+    # implementation we need to emulate parts of the mapping
+    # that occurs in the compiler and the dataframe layer
+    if id_vars is None:
+        id_vars = []
+    if not is_list_like(id_vars):
+        id_vars = [id_vars]
+    if value_vars is None:
+        value_vars = df.columns.difference(id_vars)
+    if var_name is None:
+        var_name = "variable"
+
+    df_qc = df._query_compiler
+    df_frame = df_qc._modin_frame
+
+    if use_simple_unpivot is True:
+        df_frame = _simple_unpivot(df_frame, id_vars, value_vars, var_name, value_name)
+    else:
+        df_frame = _general_unpivot(
+            df_frame,
+            id_vars,
+            value_vars,
+            var_name,
+            value_name,
+            ignore_index=ignore_index,
+        )
+    return df.__constructor__(query_compiler=SnowflakeQueryCompiler(df_frame))
+
+
+@pytest.mark.parametrize(
+    "data",
+    data,
+)
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_melt_general_path(data):
+    native_df = npd.DataFrame(data["frame"])
+    snow_df = pd.DataFrame(native_df)
+    ndf = native_df.melt(**data["kargs"])
+    sdf = run_internal_melt(snow_df, **data["kargs"], use_simple_unpivot=False)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(sdf, ndf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    data,
+)
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_melt_simple_path(data):
+    native_df = npd.DataFrame(data["frame"])
+    snow_df = pd.DataFrame(native_df)
+    ndf = native_df.melt(**data["kargs"])
+    sdf = run_internal_melt(snow_df, **data["kargs"], use_simple_unpivot=True)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(sdf, ndf)
+
+
+@pytest.mark.parametrize(
+    "empty_data",
+    [{"frame": {"abc": [], "123": []}, "kargs": {}}],
+)
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_empty_col_melt(empty_data):
+    native_df = npd.DataFrame(empty_data["frame"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@pytest.mark.parametrize(
+    "empty_data",
+    [{"frame": {}, "kargs": {}}],
+)
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_empty_melt(empty_data):
+    native_df = npd.DataFrame(empty_data["frame"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_duplicate_index():
+    native_df = npd.DataFrame(
+        [[1, 2], [3, 4]], index=["dupe", "dupe"], columns=["A", "B"]
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_duplicate_cols():
+    native_df = npd.DataFrame([[1, 2], [3, 4]], columns=["dupe", "dupe"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_integer_colnames():
+    native_df = npd.DataFrame([[1, 2], [3, 4]], columns=[1, 2])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, union_count=0, join_count=0)
+def test_structured_colnames():
+    native_df = npd.DataFrame([[1, 2], [3, 4]], columns=[("What", "Is"), ("This")])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+def test_col_index_melt():
+    native_df = npd.DataFrame(
+        {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        index=["there", "be", "dragons"],
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.melt(ignore_index=True)
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+def test_col_index_melt_keep_index():
+    native_df = npd.DataFrame(
+        {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]},
+        index=["there", "be", "dragons"],
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.melt(ignore_index=False)
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+def test_multi_index_melt():
+    index = npd.MultiIndex.from_tuples(
+        [("one", "there"), ("two", "be"), ("two", "dragons")], names=["L1", "L2"]
+    )
+    data = {"abc": ["A", "B", "C"], "123": ["1", "2", "3"]}
+    native_df = npd.DataFrame(data, index=index)
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=0, join_count=0, union_count=0)
+def test_multi_column_melt():
+    index = npd.MultiIndex.from_tuples(
+        [("one", "there"), ("two", "be"), ("two", "dragons")], names=["L1", "L2"]
+    )
+    data = [["A", "B", "C"], ["1", "2", "3"], ["X", "Y", "Z"]]
+    native_df = npd.DataFrame(data, columns=index)
+    snow_df = pd.DataFrame(native_df)
+    try:
+        eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+    except NotImplementedError:
+        pass
+
+
+@sql_count_checker(query_count=2, join_count=0, union_count=0)
+def test_multi_melt():
+    native_df = npd.DataFrame({"abc": ["A", "B", "C"], "123": ["1", "2", "3"]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.melt())
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+def test_pd_melt():
+    native_df = npd.DataFrame({"abc": ["A", "B", "C"], "123": ["1", "2", "3"]})
+    snow_df = pd.DataFrame(native_df)
+    ndf = npd.melt(native_df)
+    sdf = pd.melt(snow_df)
+    assert_snowpark_pandas_equal_to_pandas(sdf, ndf)
+
+
+@sql_count_checker(query_count=1, join_count=0, union_count=0)
+def test_everything():
+    index = npd.MultiIndex.from_tuples(
+        [("one", "there"), ("two", "be"), ("two", "dragons")], names=["L1", "L2"]
+    )
+    data = {
+        "abc": ["A", "B", np.NaN],
+        "123": [1, np.NaN, 3],
+        "state": ["CA", "WA", "NY"],
+    }
+    native_df = npd.DataFrame(data, index=index)
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.melt(
+            id_vars=["state"],
+            value_vars=["abc", "123"],
+            ignore_index=False,
+            var_name="independent",
+            value_name="dependent",
+        ),
+    )
diff --git a/tests/integ/modin/frame/test_memory_usage.py b/tests/integ/modin/frame/test_memory_usage.py
new file mode 100644
index 00000000000..d13b0bc36e4
--- /dev/null
+++ b/tests/integ/modin/frame/test_memory_usage.py
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@sql_count_checker(query_count=1)
+def test_memory_usage():
+    assert pd.DataFrame([1]).memory_usage()[0] == 0
+    assert pd.Series([1]).memory_usage() == 0
+    df = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
+    df.index.name = "My Index"
+    assert df.memory_usage().size == 3
+    assert df.memory_usage().index[0] == "Index"
+    assert df.memory_usage(index=False).size == 2
diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py
new file mode 100644
index 00000000000..8ebf40a3096
--- /dev/null
+++ b/tests/integ/modin/frame/test_merge.py
@@ -0,0 +1,1260 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from collections.abc import Hashable
+from typing import Optional, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._typing import AnyArrayLike, IndexLabel
+from pandas.errors import MergeError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture
+def empty_df():
+    return pd.DataFrame(
+        columns=["A", "B"], dtype="int64", index=native_pd.Index([], name="i")
+    )
+
+
+@pytest.fixture
+def left_df():
+    return pd.DataFrame(
+        {
+            "A": [3, 2, 1, 4, 4],
+            "B": [2, 3, 1, 2, 1],
+            "left_c": [1.0, 2.0, 3.0, 4.0, np.NaN],
+            "left_d": [None, "d", "a", "c", "b"],
+        },
+        index=native_pd.Index([0, 1, 3, 2, 4], name="left_i"),
+    )
+
+
+@pytest.fixture(params=["left", "inner", "right", "outer"])
+def how(request):
+    """
+    how keyword to pass to merge.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """
+    sort keyword to pass to merge.
+    """
+    return request.param
+
+
+@pytest.fixture
+def right_df():
+    return pd.DataFrame(
+        {
+            "A": [4, 3, 1, 4, 4],
+            "B": [3, 4, 2, 1, 1],
+            "right_c": [2.0, 1.0, 4.0, 0.0, np.NaN],
+            "right_d": ["c", "d", "a", "b", None],
+        },
+        index=native_pd.Index([8, 4, 2, 9, 1], name="right_i"),
+    )
+
+
+def _merge_native_pandas_frames_on_index_on_both_sides(
+    left: native_pd.DataFrame, right: native_pd.DataFrame, how: str, sort: bool
+) -> native_pd.DataFrame:
+    """
+    Merge left and right on index.
+
+    Args:
+        left: Left DataFrame to join
+        right: Right DataFrame or Series to join
+        how: Type of merge to be performed.
+
+    Returns:
+        Merged dataframe.
+
+    """
+    native_res = left.merge(
+        right, left_index=True, right_index=True, how=how, sort=sort
+    )
+
+    is_left_mi = left.index.nlevels > 1
+    is_right_mi = right.index.nlevels > 1
+    if sort:
+        # When joining single index frame with multi-index or multi-index frame with
+        # single index, native pandas doesn't respect 'sort' argument. It always
+        # behaves as sort=False.
+        # We need to sort explicitly to compare results with Snowpark pandas result.
+        if (is_left_mi and not is_right_mi) or (not is_left_mi and is_right_mi):
+            join_key = left.index.name if is_right_mi else right.index.name
+            native_res = (
+                native_res.reset_index(level=join_key, drop=False)
+                .sort_values(join_key, kind="stable")
+                .set_index([join_key], append=True)
+            )
+        # When joining multi-index frame with another multi-index frame where index
+        # labels are same only order is different, native pandas doesn't respect 'sort'
+        # argument. It always behaves as sort=False.
+        # We need to sort explicitly to compare results with Snowpark pandas result.
+        if (
+            is_left_mi
+            and is_right_mi
+            and left.index.nlevels == right.index.nlevels
+            and not set(left.index.names).difference(set(right.index.names))
+        ):
+            native_res = native_res.sort_index()
+
+    # Index column name in merged frame is pretty inconsistent in native pandas.
+    # In some cases it is inherited from left frame and in some cases its set to None.
+    # In Snowpark pandas we provide consistent behavior by always populating index columns
+    # names from left frame.
+    # Fix index names before comparison.
+    if not is_left_mi and not is_right_mi:
+        native_res = native_res.rename_axis(left.index.names)
+    return native_res
+
+
+def _merge_native_pandas_frames_on_index_on_one_side(
+    left: native_pd.DataFrame,
+    right: native_pd.DataFrame,
+    how: str,
+    left_on: Optional[Union[IndexLabel, AnyArrayLike]] = None,
+    right_on: Optional[Union[IndexLabel, AnyArrayLike]] = None,
+    left_index: Optional[bool] = False,
+    right_index: Optional[bool] = False,
+    sort: Optional[bool] = False,
+) -> native_pd.DataFrame:
+    """
+    Merge left and right where either left_index or right_index is True but not both.
+
+    Args:
+        left: Left DataFrame to join
+        right: Right DataFrame or Series to join
+        how: Type of merge to be performed.
+        left_on: Columns to join on in left DataFrame.
+        right_on: Columns ot join on in right DataFrame.
+        left_index: If True, use index from left DataFrame as join keys.
+        right_index: If True, use index from right DataFrame as join keys.
+
+    Returns:
+        Merged dataframe.
+    """
+    # Merging frames when 'left_index' or 'right_index' is True is full of bugs in
+    # native pandas. Few of them are:
+    # https://github.com/pandas-dev/pandas/issues/17257
+    # Right/outer merge behavior on left column and right index is unexpected
+    #
+    # https://github.com/pandas-dev/pandas/issues/28243
+    # Left join on index and column gives incorrect output
+    #
+    # https://github.com/pandas-dev/pandas/issues/33232
+    # merge() outer with left_on column and right_index=True produces unexpected results
+    #
+    # So to compare results against Snowpark pandas, we merge native frames
+    # by providing index column names explicitly.
+    left_on = left.index.names if left_index else left_on
+    right_on = right.index.names if right_index else right_on
+
+    # Merging frames with 'left_on' and 'right_on' will reset the index. But with
+    # either left_index or right_index being True expectation is that index values
+    # should be inherited from left frame. So we move index columns to data columns
+    # before merging the frames and recover index by calling set_index after merge.
+    left_index_names = left.index.names
+    left = left.reset_index(drop=False)
+
+    native_res = left.merge(
+        right,
+        how=how,
+        left_on=left_on,
+        right_on=right_on,
+        sort=sort,
+    )
+
+    # Recover index values.
+    return native_res.set_index(left_index_names)
+
+
+def _add_row_position_columns(
+    left: native_pd.DataFrame, right: Union[native_pd.DataFrame, native_pd.Series]
+) -> tuple[native_pd.DataFrame, native_pd.DataFrame]:
+    if isinstance(right, native_pd.Series):
+        right = right.to_frame()
+    # Add row position columns to both frames.
+    left = left.assign(left_pos=range(len(left)))
+    right = right.assign(right_pos=range(len(right)))
+    return left, right
+
+
+def _sort_and_remove_row_position_columns(
+    df: native_pd.DataFrame, how: str, ignore_index: bool
+) -> native_pd.DataFrame:
+    # Sort by row position columns
+    # To match native pandas behavior, reset index if left_index and right_index
+    # both are false.
+    row_position_columns = ["left_pos", "right_pos"]
+    if how == "right":
+        row_position_columns.reverse()
+    df = df.sort_values(row_position_columns, ignore_index=ignore_index)
+
+    # Drop row position columns
+    return df.drop(row_position_columns, axis=1)
+
+
+def _verify_merge(
+    left: pd.DataFrame,
+    right: Union[pd.DataFrame, pd.Series],
+    how: str,
+    *,
+    on: Optional[IndexLabel] = None,
+    left_on: Optional[Union[IndexLabel, AnyArrayLike]] = None,
+    right_on: Optional[Union[IndexLabel, AnyArrayLike]] = None,
+    left_index: Optional[bool] = False,
+    right_index: Optional[bool] = False,
+    force_output_column_order: Optional[list[Hashable]] = None,
+    sort: Optional[bool] = False,
+    indicator: Optional[Union[bool, str]] = False,
+) -> None:
+    """
+    For inner and outer join order of joined frame in Snowpark pandas is different from
+    native pandas. In Snowpark pandas we order by [left.row_position, right.row_position]
+    while in Native pandas output of joined frame is: first rows are grouped by the
+    keys, and order among keys is inherited from the left dataframe, and then the right
+    data frame.
+
+    In addition to above there are bugs in Native pandas where even left and right join
+    do not preserve order from left frame or right frame respectively.
+    https://github.com/pandas-dev/pandas/issues/40608
+    join/merge of DataFrame does not keep order of index
+
+    https://github.com/pandas-dev/pandas/issues/46225
+    outer join out of order when joining multiple DataFrames
+
+    To compare join/merge results we perform following additional operations on
+    native dataframes to simulate ordering behavior of Snowpark pandas.
+    1. Add an extra column with row position to left and right dataframes.
+    2. Join both frames.
+    3. Sort joined frame on row position columns added in step 1.
+    4. Drop row position columns and compare.
+
+    Args:
+        left: Left DataFrame to join
+        right: Right DataFrame or Series to join
+        how: Type of merge to be performed.
+        on: Columns to join on.
+        left_on: Columns to join on in left DataFrame.
+        right_on: Columns ot join on in right DataFrame.
+        left_index: If True, use index from left DataFrame as join keys.
+        right_index: If True, use index from right DataFrame as join keys.
+        force_output_column_order: If provided, reorder native result using this list.
+        indicator: If true include indicator column.
+
+    Returns:
+        None
+    """
+    left_native = left.to_pandas()
+    right_native = right.to_pandas()
+    # Step 1: Add row position columns
+    if not sort:
+        left_native, right_native = _add_row_position_columns(left_native, right_native)
+
+    # Step 2: Join frames.
+    if left_index and right_index:
+        native_res = _merge_native_pandas_frames_on_index_on_both_sides(
+            left_native, right_native, how, sort=sort
+        )
+    elif left_index or right_index:
+        native_res = _merge_native_pandas_frames_on_index_on_one_side(
+            left_native,
+            right_native,
+            how,
+            left_on,
+            right_on,
+            left_index,
+            right_index,
+            sort=sort,
+        )
+    else:
+        native_res = left_native.merge(
+            right_native,
+            how=how,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+            sort=sort,
+            indicator=indicator,
+        )
+
+    if not sort:
+        # Step 3 & 4
+        ignore_index = not (left_index or right_index)
+        native_res = _sort_and_remove_row_position_columns(
+            native_res, how, ignore_index
+        )
+
+    if force_output_column_order:
+        native_res = native_res.reindex(columns=force_output_column_order)
+
+    snow_res = left.merge(
+        right,
+        how=how,
+        on=on,
+        left_on=left_on,
+        right_on=right_on,
+        left_index=left_index,
+        right_index=right_index,
+        sort=sort,
+        indicator=indicator,
+    )
+
+    if indicator:
+        # Native pandas returns categorical column. Snowflake backend doesn't support
+        # categorical columns so snowpark pandas will create a column of string type.
+        # Change native pandas indicator column type to string before comparison.
+        indicator_col_label = native_res.columns[-1]
+        native_res[indicator_col_label] = native_res[indicator_col_label].astype("str")
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_res, native_res, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize("on", ["A", "B", ["A", "B"], ("A", "B")])
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on(left_df, right_df, on, how, sort):
+    _verify_merge(left_df, right_df, how, on=on, sort=sort)
+
+
+@pytest.mark.parametrize("on", ["left_i", "right_i"])
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_index_columns(left_df, right_df, how, on, sort):
+    if how == "outer" and sort is False:
+        pytest.xfail(
+            "SNOW-1321662 - pandas 2.2.1 update fails when merge is outer and sort is False"
+        )
+    # Change left_df to: columns=["right_i", "B", "left_c", "left_d"] index=["left_i"]
+    left_df = left_df.rename(columns={"A": "right_i"})
+    # Change right_df to: columns=["left_i", "B", "right_c", "right_d"] index=["right_i"]
+    right_df = right_df.rename(columns={"A": "left_i"})
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on=on,
+            how=how,
+            sort=sort,
+        ),
+    )
+
+
+@pytest.mark.parametrize("index1", [[3, 4], [1.5, 8.0], [None, None]])
+@pytest.mark.parametrize("index2", [[7, 8], [1.5, 3.0], [None, None]])
+@sql_count_checker(query_count=3, join_count=1)
+def test_join_type_mismatch(index1, index2):
+    df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
+    df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
+    _verify_merge(df1, df2, "outer", left_index=True, right_index=True)
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        ([3, 4], ["a", "b"]),
+        (["a", "b"], [1.5, 8.0]),
+        ([True, False], ["a", "b"]),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_join_type_mismatch_negative(index1, index2):
+    df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
+    df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
+    with pytest.raises(SnowparkSQLException, match="value 'a' is not recognized"):
+        df1.merge(df2, how="outer", left_index=True, right_index=True).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "index1, index2, expected_res",
+    [
+        # integer and boolean, Snowflake is able to convert integer to bool where everything > 0 is True, and
+        # then perform the join. However, native pandas does not convert integer to bool, and produces a
+        # different result.
+        (
+            [3, 4],
+            [True, False],
+            native_pd.DataFrame(
+                {"A": [1.0, 2.0, np.NaN], "B": [3, 3, 4]},
+                index=native_pd.Index([True, True, False]),
+            ),
+        ),
+        # string and bool, Snowflake converts bool to string, and then performs the join. However, native pandas
+        # doesn't covert bool to string.
+        (
+            ["a", "b"],
+            [True, False],
+            native_pd.DataFrame(
+                {"A": [1.0, 2.0, np.NaN, np.NaN], "B": [np.NaN, np.NaN, 3.0, 4.0]},
+                index=native_pd.Index(["a", "b", "true", "false"]),
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_join_type_mismatch_diff_with_native_pandas(index1, index2, expected_res):
+    df1 = pd.DataFrame({"A": [1, 2]}, index=index1)
+    df2 = pd.DataFrame({"B": [3, 4]}, index=index2)
+
+    snow_res = df1.merge(df2, how="outer", left_index=True, right_index=True)
+    assert_frame_equal(snow_res, expected_res, check_dtype=False)
+
+
+@pytest.mark.parametrize("on", ["A", "B", "C"])
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_index_columns_with_multiindex(left_df, right_df, how, on, sort):
+    # Change left_df to: columns = ["C", "left_d"] index = ["A", "B"]
+    left_df = left_df.rename(columns={"left_c": "C"}).set_index(["A", "B"])
+    # Change right_df to: columns = ["A", "B"] index = ["C", "right_d"]
+    right_df = right_df.rename(columns={"right_c": "C"}).set_index(["C", "right_d"])
+    _verify_merge(left_df, right_df, how, on=on, sort=sort)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_multiindex_with_non_multiindex(left_df, right_df, how, sort):
+    # Change left_df to: columns = ["A", "B"] index = ["left_c", "left_d"]
+    left_df = left_df.set_index(["left_c", "left_d"])
+    _verify_merge(left_df, right_df, how, on="A", sort=sort)
+
+
+@pytest.mark.parametrize(
+    "left_on, right_on",
+    [
+        ("B", "B"),
+        ("left_c", "right_c"),
+        ("left_d", "right_d"),
+        (["A", "left_c"], ["B", "right_c"]),
+        (("A", "left_c"), ("B", "right_c")),  # tuple
+        (["left_d", "A"], ["right_d", "A"]),
+        ("left_i", "right_i"),  # joining index on index
+        ("left_i", "A"),  # joining index on column
+        ("B", "right_i"),  # joining column on index
+        (["A", "left_i"], ["B", "right_i"]),  # Mix of index and data join keys
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_left_on_right_on(left_df, right_df, how, left_on, right_on, sort):
+    _verify_merge(left_df, right_df, how, left_on=left_on, right_on=right_on, sort=sort)
+
+
+@pytest.mark.parametrize("left_on", ["left_i", "A", "B"])
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_left_on_right_index(left_df, right_df, how, left_on, sort):
+    _verify_merge(left_df, right_df, how, left_on=left_on, right_index=True, sort=sort)
+
+
+@pytest.mark.parametrize("right_on", ["right_i", "A", "B"])
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_left_index_right_on(left_df, right_df, how, right_on, sort):
+    _verify_merge(left_df, right_df, how, left_index=True, right_on=right_on, sort=sort)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_index_single_index(left_df, right_df, how, sort):
+    _verify_merge(left_df, right_df, how, left_index=True, right_index=True, sort=sort)
+
+
+@pytest.mark.xfail(reason="SNOW-1321662 - pandas 2.2.1 update failure", strict=True)
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_index_multiindex_common_labels(left_df, right_df, how, sort):
+    left_df = left_df.set_index("A", append=True)  # index columns ['left_i', 'A']
+    right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
+    _verify_merge(
+        left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
+    )
+
+
+@pytest.mark.xfail(reason="SNOW-1321662 - pandas 2.2.1 update failure", strict=True)
+def test_merge_on_index_multiindex_common_labels_with_none(
+    left_df, right_df, how, sort
+):
+    # index columns [None, 'A']
+    with SqlCounter(query_count=4):
+        left_df = left_df.set_index("A", append=True).rename_axis([None, "A"])
+
+    # index columns ['A', None]
+    with SqlCounter(query_count=4):
+        right_df = right_df.set_index("A", append=True).rename_axis(["A", None])
+
+    with SqlCounter(query_count=3, join_count=9):
+        _verify_merge(
+            left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
+        )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_on_index_multiindex_equal_labels(left_df, right_df, how, sort):
+    # index columns ['A', 'B]
+    left_df = left_df.set_index(["A", "B"])
+    # index columns ['A', 'B]
+    right_df = right_df.set_index(["A", "B"])
+    _verify_merge(
+        left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
+    )
+
+
+def test_merge_left_index_right_index_single_to_multi(left_df, right_df, how, sort):
+    if how == "outer" and sort is False:
+        pytest.xfail(
+            "SNOW-1321662 - pandas 2.2.1 update fails when merge is outer and sort is False"
+        )
+    right_df = right_df.rename(columns={"A": "left_i"}).set_index(
+        "left_i", append=True
+    )  # index columns ['right_i', 'left_i']
+    if how in ("inner", "right"):
+        with SqlCounter(query_count=3, join_count=1):
+            _verify_merge(
+                left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
+            )
+    else:  # left and outer join
+        # When joining single index with multi index, in native pandas 'left' join
+        # behaves as 'inner' join and 'outer' join behaves as 'right' join.
+        # https://github.com/pandas-dev/pandas/issues/34292
+        # https://github.com/pandas-dev/pandas/issues/49516
+
+        # We verify index column names explicitly.
+        snow_res = left_df.merge(
+            right_df, how=how, left_index=True, right_index=True, sort=sort
+        )
+        assert snow_res.index.names == ["right_i", "left_i"]
+        # Join pandas frames using 'on' to verify snow_res values.
+        native_res = (
+            left_df.to_pandas()
+            .merge(right_df.to_pandas(), how=how, on="left_i", sort=sort)
+            .reset_index(drop=True)
+        )
+        with SqlCounter(query_count=1, join_count=1):
+            assert_snowpark_pandas_equal_to_pandas(
+                snow_res.reset_index(drop=True), native_res
+            )
+
+
+def test_merge_left_index_right_index_multi_to_single(left_df, right_df, how, sort):
+    if how == "outer" and sort is False:
+        pytest.xfail(
+            "SNOW-1321662 - pandas 2.2.1 update fails when merge is outer and sort is False"
+        )
+    left_df = left_df.rename(columns={"A": "right_i"}).set_index(
+        "right_i", append=True
+    )  # index columns ['left_i', 'right_i']
+    if how in ("left", "inner"):
+        with SqlCounter(query_count=3, join_count=1):
+            _verify_merge(
+                left_df, right_df, how=how, left_index=True, right_index=True, sort=sort
+            )
+    else:  # right and outer join
+        # When joining multi index with single index, in native pandas 'right' join
+        # behaves as 'inner' join and 'outer' join behaves as 'left' join.
+
+        # We verify index column names explicitly.
+        snow_res = left_df.merge(
+            right_df, how=how, left_index=True, right_index=True, sort=sort
+        )
+        assert snow_res.index.names == ["left_i", "right_i"]
+        # Join pandas frames using 'on' to verify snow_res values.
+        native_res = (
+            left_df.to_pandas()
+            .merge(right_df.to_pandas(), how=how, on="right_i", sort=sort)
+            .reset_index(drop=True)
+        )
+        with SqlCounter(query_count=1, join_count=1):
+            assert_snowpark_pandas_equal_to_pandas(
+                snow_res.reset_index(drop=True), native_res
+            )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_left_index_right_index_no_common_names_negative(left_df, right_df):
+    left_df = left_df.set_index("B", append=True)  # index columns ['left_i', 'B']
+    right_df = right_df.set_index("A", append=True)  # index columns ['right_i', 'A']
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            left_index=True,
+            right_index=True,
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="cannot join with no overlapping index names",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_left_index_right_index_none_as_common_label_negative(left_df, right_df):
+    # index columns [None, 'B']
+    left_df = left_df.reset_index(drop=True).set_index("B", append=True)
+    # index columns [None, 'A']
+    right_df = right_df.reset_index(drop=True).set_index("A", append=True)
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            left_index=True,
+            right_index=True,
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="cannot join with no overlapping index names",
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_cross(left_df, right_df, sort):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            how="cross",
+            # There are no join keys in cross join. Sort param doesn't impact output
+            # order.
+            sort=sort,
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"on": "A"},
+        {"left_on": "A", "right_on": "B"},
+        {"left_on": "left_i", "right_index": True},
+        {"left_index": True, "right_on": "A"},
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_non_empty_with_empty(left_df, empty_df, how, kwargs, sort):
+    _verify_merge(left_df, empty_df, how, sort=sort, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"on": "A"},
+        {"left_on": "A", "right_on": "B"},
+        {"left_on": "i", "right_index": True},
+        {"left_index": True, "right_on": "A"},
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_empty_with_non_empty(empty_df, right_df, how, kwargs, sort):
+    # Native pandas returns incorrect column order when left frame is empty.
+    # https://github.com/pandas-dev/pandas/issues/51929
+    if "on" in kwargs:
+        columns = ["A", "B_x", "B_y", "right_c", "right_d"]
+    else:
+        columns = ["A_x", "B_x", "A_y", "B_y", "right_c", "right_d"]
+    _verify_merge(
+        empty_df, right_df, how, sort=sort, **kwargs, force_output_column_order=columns
+    )
+
+
+@pytest.mark.parametrize(
+    "on, left_on, right_on, left_index, right_index",
+    [
+        (None, None, None, True, False),  # Only left_index is set to True
+        (None, None, None, False, True),  # Only right_index is set to True
+        (None, "A", None, False, False),  # Only left_on is provided
+        (None, None, "A", False, False),  # Only right_on is provided
+        ("A", "A", None, False, False),  # on and left_on both provided
+        ("A", None, "A", False, False),  # on and right_on both provided
+        ("A", None, None, True, False),  # on and left_index both provided
+        ("A", None, None, False, True),  # on and right_index both provided
+        (None, "A", None, True, False),  # left_on and left_index both provided
+        (None, None, "A", False, True),  # right_on and right_index both provided
+        (None, ["A"], ["A", "B"], False, False),  # length mismatch
+        (None, None, None, ["A"], True),  # left_index not a bool
+        (None, None, None, True, ["A"]),  # right_index not a bool
+        ("ABC", None, None, False, False),  # Unknown label in on
+        (None, "ABC", None, False, True),  # Unknown label in left_on
+        (None, None, "ABC", True, False),  # Unknown label in right_on
+        (None, ["A", "B"], None, False, True),  # len(left_on) != right.num_index_levels
+        (None, None, ["A", "B"], True, False),  # left.num_index_levels != len(right_on)
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_merge_mis_specified_negative(
+    left_df, right_df, on, left_on, right_on, left_index, right_index
+):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+        ),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "on, left_on, right_on, left_index, right_index",
+    [
+        ("A", None, None, False, False),  # on provided
+        (None, "A", None, False, False),  # left_on provided
+        (None, None, "A", False, False),  # right_on provided
+        (None, None, None, True, False),  # left_index is set to True
+        (None, None, None, False, True),  # right_index is set to True
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_merge_cross_mis_specified_negative(
+    left_df, right_df, on, left_on, right_on, left_index, right_index
+):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on=on,
+            how="cross",
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+        ),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "left_col, right_col, kwargs",
+    [
+        (0, 0, {"suffixes": ("", "_dup")}),
+        (0, 0, {"suffixes": (None, "_dup")}),
+        (0, 0, {"suffixes": ("_x", "_y")}),
+        (0, 0, {"suffixes": ["_x", "_y"]}),
+        (0, 0, {"suffixes": ("_a", None)}),
+        (0, 0, {}),
+        ("b", "b", {"suffixes": (None, "_y")}),
+        ("a", "a", {"suffixes": ("_x", None)}),
+        ("a", "b", {"suffixes": ("_x", None)}),
+        ("a", "a", {"suffixes": (None, "_x")}),
+        ("a", "a", {}),
+        ("a", 0, {"suffixes": (None, "_y")}),
+        (0.0, 0.0, {"suffixes": ("_x", None)}),
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_suffix(left_df, right_df, left_col, right_col, kwargs):
+    left_df = left_df.rename(columns={"A": left_col})
+    right_df = right_df.rename(columns={"A": right_col})
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="B",
+            how="left",
+            **kwargs,
+        ),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_duplicate_suffix(left_df, right_df):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            how="left",
+            suffixes=("_x", "_x"),
+        ),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_label_conflict_with_suffix(left_df, right_df):
+    # Test the behavior when adding suffix crates a conflict with another label.
+    # Note: This raises a warning in pandas 2.0 and will raise an error in future
+    # versions https://github.com/pandas-dev/pandas/issues/22818
+
+    # Change left_df columns to ["A", "B", "C", "C_y"]
+    left_df = left_df.rename(columns={"left_c": "C", "left_d": "C_y"})
+    # Change right_df columns to ["A", "B", "C", "D"]
+    right_df = right_df.rename(columns={"right_c": "C", "right_d": "D"})
+
+    # During merge suffix '_y' is added to 'C' from right frame, but it conflicts with
+    # last column in left frame.
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            how="left",
+        ),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_non_str_suffix(left_df, right_df):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            how="left",
+            suffixes=(0, 2),
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "suffixes",
+    [(None, None), ("", None), (None, ""), ("", "")],
+)
+@sql_count_checker(query_count=2)
+def test_merge_empty_suffix_negative(left_df, right_df, suffixes):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            suffixes=suffixes,
+        ),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "suffixes",
+    [("a", "b", "c"), tuple("a")],
+)
+@sql_count_checker(query_count=2)
+def test_merge_suffix_length_error_negative(left_df, right_df, suffixes):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            suffixes=suffixes,
+        ),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_duplicate_labels(left_df, right_df):
+    # Change left_df columns to ["A", "B", "left_c", "left_c"]
+    # 'left_c' is a duplicate label.
+    left_df = left_df.rename(columns={"left_d": "left_c"})
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            how="left",
+        ),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_duplicate_join_keys_negative(left_df, right_df):
+    # Change left_df columns to ["A", "B", "left_c", "left_c"]
+    # 'left_c' is a duplicate label. This can not be used as join key.
+    left_df = left_df.rename(columns={"left_d": "left_c"})
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="left_c",
+        ),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_invalid_how_negative(left_df, right_df):
+    pytest.xfail("SNOW-1321662 - pandas 2.2.1 update error message different in pandas")
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            how="full_outer_join",
+        ),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_merge_with_self():
+    snow_df = pd.DataFrame({"A": [1, 2, 3]})
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.merge(df, on="A"),
+    )
+
+
+@pytest.mark.parametrize("on", ["A", "B"])
+@sql_count_checker(query_count=4, join_count=1)
+def test_merge_with_series(left_df, right_df, how, on, sort):
+    native_series = right_df.to_pandas()[on]
+    snow_series = pd.Series(native_series)
+    _verify_merge(left_df, snow_series, how=how, on=on, sort=sort)
+
+
+@sql_count_checker(query_count=1)
+def test_merge_with_unnamed_series_negative(left_df):
+    native_series = native_pd.Series([1, 2, 3])
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            snow_series if isinstance(df, pd.DataFrame) else native_series
+        ),
+        # Expect: ValueError: Cannot merge a Series without a name
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_multiindex_columns():
+    left = pd.DataFrame({("A", 1): [1, 2], ("A", 2): [3, 4], ("B", 3): [5, 6]})
+    right = pd.DataFrame({("A", 1): [1, 2], ("B", 3): [4, 5], ("C", 4): [6, 7]})
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.merge(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas(),
+            on=[("B", 3)],
+            how="left",
+        ),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_series_multiindex_columns():
+    left = pd.DataFrame({("A", 1): [1, 2], ("A", 2): [3, 4], ("B", 3): [5, 6]})
+    right = pd.Series([1, 2], name=("B", 3))
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.merge(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas(),
+            on=[("B", 3)],
+            how="left",
+        ),
+    )
+
+
+@pytest.mark.parametrize("dtype", [None, "Int64"])
+@sql_count_checker(query_count=6, join_count=2)
+def test_merge_outer_with_nan(dtype):
+    left = pd.DataFrame({"key": [1, 2], "col1": [1, 2]}, dtype=dtype)
+    right = pd.DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}, dtype=dtype)
+    _verify_merge(left, right, "outer", on="key")
+    # switch left and right
+    _verify_merge(right, left, "outer", on="key")
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_different_index_names():
+    left = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="c"))
+    right = pd.DataFrame({"a": [1]}, index=pd.Index([1], name="d"))
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.merge(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas(),
+            left_on="c",
+            right_on="d",
+        ),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_no_join_keys(left_df, right_df, how, sort):
+    _verify_merge(left_df, right_df, how, sort=sort)
+
+
+@pytest.mark.parametrize("left_name, right_name", [("left_a", "right_a"), (1, "1")])
+@sql_count_checker(query_count=2)
+def test_merge_no_join_keys_negative(left_name, right_name, left_df, right_df):
+    left_df = left_df.rename(columns={"A": left_name, "B": "left_b"})
+    right_df = right_df.rename(columns={"A": right_name, "B": "right_b"})
+    # Joining without any join keys and frames have no common columns. Expect exception
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+        ),
+        expect_exception=True,
+        expect_exception_type=MergeError,
+        expect_exception_match="No common columns to perform merge on",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_no_join_keys_common_index_negative(left_df, right_df):
+    left_df = pd.DataFrame({"A": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
+    right_df = pd.DataFrame({"B": [1, 2, 3]}, native_pd.Index([7, 8, 9], name="KEY"))
+    # Joining without any join keys and frames have no common data columns.
+    # Common index column should not be used as join key.
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+        ),
+        expect_exception=True,
+        expect_exception_type=MergeError,
+        expect_exception_match="No common columns to perform merge on",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_no_join_keys_common_index_with_data_negative(left_df, right_df):
+    left_df = left_df.rename(columns={"A": "left_a", "B": "left_b"})
+    right_df = right_df.rename(columns={"A": "right_a", "B": "left_i"})
+    # Joining without any join keys and frames have no common data columns.
+    # Common column 'left_i' which is an index column in left frame and data column in
+    # right frame, should not be used.
+    # Expect exception
+    # MergeError: No common columns to perform merge on...
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+        ),
+        expect_exception=True,
+        expect_exception_type=MergeError,
+        expect_exception_match="No common columns to perform merge on",
+    )
+
+
+@pytest.mark.parametrize(
+    "left_on, right_on, expected_query_count, expected_join_count",
+    [
+        (np.array(["a", "b", "c", "x", "y"]), "right_d", 5, 2),
+        ([np.array(["a", "b", "c", "x", "y"]), "A"], ["right_d", "A"], 5, 2),
+        ("left_d", np.array(["a", "b", "c", "x", "y"]), 5, 2),
+        (["left_d", "A"], [np.array(["a", "b", "c", "x", "y"]), "A"], 5, 2),
+        (["left_d", "A"], (np.array(["a", "b", "c", "x", "y"]), "A"), 5, 2),  # tuple
+        (
+            np.array(["a", "b", "c", "x", "y"]),
+            np.array(["x", "y", "c", "a", "b"]),
+            7,
+            3,
+        ),
+    ],
+)
+def test_merge_on_array_like_keys(
+    left_df, right_df, left_on, right_on, how, expected_query_count, expected_join_count
+):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        _verify_merge(left_df, right_df, how=how, left_on=left_on, right_on=right_on)
+
+
+@sql_count_checker(query_count=2)
+def test_merge_on_array_like_keys_conflict_negative(left_df, right_df):
+    left_on = np.array(["a", "b", "c", "x", "y"])
+    right_on = np.array(["x", "y", "c", "a", "b"])
+    left_df = left_df.rename(columns={"A": "key_0"})
+
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            left_on=left_on,
+            right_on=right_on,
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="cannot insert key_0, already exists",
+    )
+
+
+@pytest.mark.parametrize(
+    "left_on",
+    [
+        np.array(["a", "b", "c", "x"]),  # too short
+        np.array(["a", "b", "c", "a", "b", "c"]),  # too long
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_merge_on_array_like_keys_length_mismatch_negative(left_df, right_df, left_on):
+    # Native pandas raises
+    # ValueError: The truth value of an array with more than one element is ambiguous
+    # Error message is not very helpful. So we instead raise error with
+    # more helpful message.
+    with pytest.raises(
+        ValueError, match="array-like join key must be of same length as dataframe"
+    ):
+        left_df.merge(right_df, left_on=left_on, right_on="right_d")
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_with_indicator(left_df, right_df, how):
+    _verify_merge(left_df, right_df, how, on="A", indicator=True)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_with_indicator_cross_join(left_df, right_df):
+    _verify_merge(left_df, right_df, how="cross", indicator=True)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_merge_with_indicator_explicit_name(left_df, right_df):
+    _verify_merge(left_df, right_df, "outer", on="A", indicator="indicator_col")
+
+
+@sql_count_checker(query_count=2)
+def test_merge_with_invalid_indicator_type_negative(left_df, right_df):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            indicator=1,
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="indicator option can only accept boolean or string arguments",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_merge_with_indicator_explicit_name_negative(left_df, right_df):
+    left_df = left_df.rename(columns={"left_c": "_merge"})
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            on="A",
+            indicator=True,
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot use name of an existing column for indicator column",
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "lvalues, rvalues, validate",
+    # 'one' should also validate as 'many'. If actual join is one-to-one
+    # validation for '1:1', '1:m', 'm:1' and 'm:m' should succeed.
+    # Similarly, if actual join is '1:m' validation for both '1:m' and 'm:m' should
+    # succeed.
+    [
+        ([1, 2, 3], [4, 3, 1], "1:1"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "1:m"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "m:1"),  # 1:1 join
+        ([1, 2, 3], [4, 3, 1], "m:m"),  # 1:1 join
+        ([1, 2, 3], [1, 3, 1], "1:m"),  # 1:m join
+        ([1, 2, 3], [1, 3, 1], "m:m"),  # 1:m join
+        ([1, 2, 1], [2, 3, 1], "m:1"),  # m:1 join
+        ([1, 2, 1], [2, 3, 1], "m:m"),  # m:1 join
+        ([1, 2, 1], [2, 3, 2], "m:m"),  # m:m join
+    ],
+)
+@sql_count_checker(query_count=12, fallback_count=1, sproc_count=1)
+def test_merge_validate(lvalues, rvalues, validate):
+    left = pd.DataFrame({"A": lvalues})
+    right = pd.DataFrame({"B": rvalues})
+    eval_snowpark_pandas_result(
+        left,
+        left.to_pandas(),
+        lambda df: df.merge(
+            right if isinstance(df, pd.DataFrame) else right.to_pandas(),
+            left_on="A",
+            right_on="B",
+            validate=validate,
+        ),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+# Single test to pass code coverage in CI
+@sql_count_checker(query_count=12, fallback_count=1, sproc_count=1)
+def test_merge_validate_for_ci(left_df, right_df):
+    eval_snowpark_pandas_result(
+        left_df,
+        left_df.to_pandas(),
+        lambda df: df.merge(
+            right_df if isinstance(df, pd.DataFrame) else right_df.to_pandas(),
+            left_on="A",
+            right_on="B",
+            validate="m:m",
+        ),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "lvalues, rvalues, validate",
+    [
+        ([1, 2, 3], [1, 3, 1], "1:1"),  # 1:m join
+        ([1, 2, 3], [1, 3, 1], "m:1"),  # 1:m join
+        ([1, 2, 1], [2, 3, 1], "1:1"),  # m:1 join
+        ([1, 2, 1], [2, 3, 1], "1:m"),  # m:1 join
+        ([1, 2, 1], [2, 3, 2], "1:1"),  # m:m join
+        ([1, 2, 1], [2, 3, 2], "1:m"),  # m:m join
+        ([1, 2, 1], [2, 3, 2], "m:1"),  # m:m join
+    ],
+)
+@sql_count_checker(query_count=5)
+def test_merge_validate_negative(lvalues, rvalues, validate):
+    left = pd.DataFrame({"A": lvalues})
+    right = pd.DataFrame({"B": rvalues})
+    # TODO: SNOW-863059 expect MergeError instead of SnowparkSqlException
+    with pytest.raises(SnowparkSQLException, match="Merge keys are not unique"):
+        left.merge(right, left_on="A", right_on="B", validate=validate)
diff --git a/tests/integ/modin/frame/test_name.py b/tests/integ/modin/frame/test_name.py
new file mode 100644
index 00000000000..a769b7cf601
--- /dev/null
+++ b/tests/integ/modin/frame/test_name.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+@pytest.mark.parametrize(
+    "sample",
+    [
+        native_pd.Series([1, 2, 3], name="abc"),
+        native_pd.Index([1, 2, 3], name="abc"),
+        native_pd.Index([], name="abc"),
+        native_pd.Index([("a", "b"), ("a", "c")], name=("a", "b")),
+        native_pd.Index(
+            [("a", "b"), ("a", "c")], tupleize_cols=False, name="('a', 'b')"
+        ),
+        [
+            native_pd.Series([1, 2, 3], name="a"),
+            native_pd.Index([1, 2, 3], name="b"),
+        ],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_create_dataframe_from_object_with_name(sample):
+    # name in sample will be kept as column name
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        pd.DataFrame(sample),
+        native_pd.DataFrame(sample),
+    )
+
+
+@sql_count_checker(query_count=3)
+def test_create_dataframe_from_snowpark_pandas_series():
+    df = pd.DataFrame([[2, 3, 4], [5, 6, 7]], columns=["X", "Y", "Z"])
+    df = pd.DataFrame([df.X, df.iloc[:, 2]])
+    assert_index_equal(df.index, pd.Index(["X", "Z"]))
diff --git a/tests/integ/modin/frame/test_ndim.py b/tests/integ/modin/frame/test_ndim.py
new file mode 100644
index 00000000000..67c2e1bd49a
--- /dev/null
+++ b/tests/integ/modin/frame/test_ndim.py
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "dataframe_input",
+    [
+        {"A": [[1], [2], [3]], "B": [[4], [5], [6]]},
+        {"A": []},
+    ],
+    ids=[
+        "list entry",
+        "empty column",
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dataframe_ndim(dataframe_input):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(dataframe_input),
+        native_pd.DataFrame(dataframe_input),
+        lambda df: df.ndim,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/frame/test_nunique.py b/tests/integ/modin/frame/test_nunique.py
new file mode 100644
index 00000000000..07f97643867
--- /dev/null
+++ b/tests/integ/modin/frame/test_nunique.py
@@ -0,0 +1,125 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
+
+TEST_LABELS = np.array(["A", "B", "C", "D"])
+TEST_DATA = [[0, 1, 2, 3], [0, 0, 0, 0], [None, 0, None, 0], [None, None, None, None]]
+
+# which original dataframe (constructed from slicing) to test for
+TEST_SLICES = [
+    (0, slice(None)),
+    (slice(None), 0),
+    (slice(None), slice(None)),
+    (slice(None), -1),
+]
+
+
+@pytest.mark.parametrize("axes_slices", TEST_SLICES)
+@pytest.mark.parametrize("dropna", [True, False])
+def test_dataframe_nunique(axes_slices, dropna):
+    expected_join_count = 0
+    if axes_slices == (0, slice(None)):
+        expected_join_count = 4
+
+    df = pd.DataFrame(
+        pd.DataFrame(TEST_DATA, columns=TEST_LABELS).iloc[
+            axes_slices[0], axes_slices[1]
+        ]
+    )
+    native_df = native_pd.DataFrame(
+        native_pd.DataFrame(TEST_DATA, columns=TEST_LABELS).iloc[
+            axes_slices[0], axes_slices[1]
+        ]
+    )
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            df,
+            native_df,
+            lambda df: df.nunique(axis=0, dropna=dropna),
+        )
+
+
+@pytest.mark.parametrize(
+    "native_df",
+    [
+        native_pd.DataFrame([]),
+        native_pd.DataFrame([], index=native_pd.Index([1, 2, 3])),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_dataframe_nunique_no_columns(native_df):
+    df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        df,
+        native_df,
+        lambda df: df.nunique(axis=0),
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(None, id="default_index"),
+        pytest.param(
+            [["bar", "bar", "baz", "foo"], ["one", "two", "one", "two"]], id="2D_index"
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "columns",
+    [
+        pytest.param(None, id="default_columns"),
+        pytest.param(
+            [["bar", "bar", "baz", "foo"], ["one", "two", "one", "two"]],
+            id="2D_columns",
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_nunique_multiindex(index, columns):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(TEST_DATA, index=index, columns=columns),
+        lambda df: df.nunique(axis=0),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_unique_axis_negative():
+    df = pd.DataFrame(TEST_DATA, columns=TEST_LABELS)
+    with pytest.raises(ValueError, match="No axis named 2 for object type DataFrame"):
+        df.nunique(axis=2)
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_unique_dropna_negative():
+    df = pd.DataFrame(TEST_DATA, columns=TEST_LABELS)
+    with pytest.raises(ValueError, match="dropna must be of type bool"):
+        df.nunique(dropna=42)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_dataframe_unique_axis1_fallback(dropna):
+    df = pd.DataFrame(TEST_DATA, columns=TEST_LABELS)
+    native_df = native_pd.DataFrame(TEST_DATA, columns=TEST_LABELS)
+
+    eval_snowpark_pandas_result(
+        df,
+        native_df,
+        lambda df: df.nunique(axis=1, dropna=dropna),
+    )
diff --git a/tests/integ/modin/frame/test_quantile.py b/tests/integ/modin/frame/test_quantile.py
new file mode 100644
index 00000000000..c05880fd11e
--- /dev/null
+++ b/tests/integ/modin/frame/test_quantile.py
@@ -0,0 +1,113 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_QUANTILE_DATA = {
+    "dates": [
+        pd.NaT,
+        pd.Timestamp("1940-04-25"),
+        pd.Timestamp("2000-10-10"),
+        pd.Timestamp("2020-12-31"),
+    ],
+    "a": [0.1, -10.0, 100, 9.2],
+    "b": [-5, -2, -1, 0],
+    "c": [89, np.nan, -540, 0.1],
+    "d": [0, 0, 0, 0],
+}
+
+TEST_QUANTILES = [
+    0.1,
+    [0.1, 0.2, 0.8],
+    [0.2, 0.8, 0.1],  # output will not be sorted by quantile
+]
+
+
+@pytest.mark.parametrize("q", TEST_QUANTILES)
+@pytest.mark.parametrize("interpolation", ["linear", "nearest"])
+def test_quantile_basic(q, interpolation):
+    snow_df = pd.DataFrame(TEST_QUANTILE_DATA)
+    native_df = native_pd.DataFrame(TEST_QUANTILE_DATA)
+    expected_query_count = 2 if isinstance(q, list) else 0
+
+    with SqlCounter(query_count=1, union_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.quantile(q, numeric_only=True),
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_quantile_empty_args():
+    # by default, returns the median (q=0.5)
+    snow_df = pd.DataFrame(TEST_QUANTILE_DATA)
+    native_df = native_pd.DataFrame(TEST_QUANTILE_DATA)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.quantile(numeric_only=True),
+    )
+
+
+@pytest.mark.parametrize("q", TEST_QUANTILES)
+@sql_count_checker(query_count=1)
+def test_quantile_empty_df(q):
+    # df.quantile() where df is empty should still have the correct columns
+    snow_df = pd.DataFrame([], columns=["c", "b", "a"], dtype=int)
+    native_df = native_pd.DataFrame([], columns=["c", "b", "a"], dtype=int)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.quantile())
+
+
+@pytest.mark.parametrize("q", TEST_QUANTILES)
+@sql_count_checker(query_count=1)
+def test_quantile_nones(q):
+    snow_df = pd.DataFrame([None] * 4, dtype=float)
+    native_df = native_pd.DataFrame([None] * 4, dtype=float)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.quantile())
+
+
+@pytest.mark.parametrize(
+    "q",
+    [
+        native_pd.Index([0.1, 0.2, 0.3, 0.8]),
+        np.array([0.1, 0.2, 0.3, 0.8]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def expected_query_count(q):
+    snow_df = pd.DataFrame(TEST_QUANTILE_DATA)
+    native_df = native_pd.DataFrame(TEST_QUANTILE_DATA)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.quantile(q, numeric_only=True),
+    )
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("interpolation", ["lower", "higher", "midpoint"])
+@pytest.mark.parametrize("method", ["table"])
+@sql_count_checker(query_count=0)
+def test_quantile_unsupported_args_negative(axis, interpolation, method):
+    snow_df = pd.DataFrame(TEST_QUANTILE_DATA)
+    with pytest.raises(NotImplementedError):
+        snow_df.quantile(
+            axis=axis, interpolation=interpolation, method=method, numeric_only=True
+        ),
+
+
+@sql_count_checker(query_count=0)
+def test_quantile_datetime_negative():
+    # Snowflake PERCENTILE_* functions do not operate on datetimes, so it should fail
+    snow_df = pd.DataFrame(TEST_QUANTILE_DATA)
+    with pytest.raises(NotImplementedError):
+        snow_df.quantile(numeric_only=False)
diff --git a/tests/integ/modin/frame/test_rank.py b/tests/integ/modin/frame/test_rank.py
new file mode 100644
index 00000000000..1687ce49053
--- /dev/null
+++ b/tests/integ/modin/frame/test_rank.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+TEST_RANK_DATA = [
+    ({"a": [1, 2, 2, 2, 3, 3, 3]}, None),
+    (
+        {
+            "a": [4, -2, 4, 8, 3],
+            "b": [1, 2, 2, 2, 5],
+            "c": [1, 2, np.nan, 2, 5],
+        },
+        None,
+    ),
+    ({"Animal": ["cat", "penguin", "dog", "spider", "snake", "dog", "bear"]}, None),
+    (
+        {
+            "a": [1, 2, np.nan, 2, 3, np.nan, 3],
+            "b": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+        },
+        None,
+    ),
+    (
+        {"Value": [4, -2, 4, 8]},
+        native_pd.MultiIndex.from_arrays(
+            [["A", "A", "B", "B"], [1, 2, 1, 2]], names=["Letter", "Number"]
+        ),
+    ),
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test df.rank with all method, na_option, ascending parameter combinations
+def test_df_rank(data, index, method, ascending, na_option):
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rank(method=method, na_option=na_option, ascending=ascending),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test df.rank numeric_only
+def test_df_rank_numeric_only(method, ascending, na_option):
+    test_rank_data = {
+        "Animal": ["cat", "penguin", "dog", "spider", "snake", "dog", "bear"],
+        "a": [4, 2, 4, 8, 3, 7, 2],
+    }
+    snow_df = pd.DataFrame(test_rank_data)
+    native_df = native_pd.DataFrame(test_rank_data)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rank(
+            method=method, ascending=ascending, na_option=na_option, numeric_only=True
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@sql_count_checker(query_count=0)
+# test unsupported args for rank parameter axis
+def test_rank_unsupported_args_negative(method, ascending, na_option):
+    test_rank_data = {
+        "a": [4, 2, 4, 8, 3, 7, 2],
+    }
+    snow_df = pd.DataFrame(test_rank_data)
+    with pytest.raises(NotImplementedError):
+        snow_df.rank(axis=1, method=method, ascending=ascending, na_option=na_option)
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test df percentile rank
+def test_df_rank_pct(data, index, method, ascending, na_option):
+    snow_df = pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    native_df = native_pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
diff --git a/tests/integ/modin/frame/test_rename.py b/tests/integ/modin/frame/test_rename.py
new file mode 100644
index 00000000000..ffc584514cd
--- /dev/null
+++ b/tests/integ/modin/frame/test_rename.py
@@ -0,0 +1,521 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import inspect
+import logging
+from collections import ChainMap
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Index, MultiIndex, Series
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+class TestRename:
+    @sql_count_checker(query_count=0)
+    def test_rename_signature(self):
+        sig = inspect.signature(DataFrame.rename)
+        parameters = set(sig.parameters)
+        assert parameters == {
+            "self",
+            "mapper",
+            "index",
+            "columns",
+            "axis",
+            "inplace",
+            "copy",
+            "level",
+            "errors",
+        }
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize("klass", [Series, DataFrame])
+    @sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+    def test_rename_mi(self, klass):
+        obj = klass(
+            [11, 21, 31],
+            index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
+        )
+        # obj.rename(str.lower)
+        native_obj = obj.to_pandas()
+        eval_snowpark_pandas_result(obj, native_obj, lambda x: x.rename(str.lower))
+
+    @pytest.fixture(scope="function")
+    def snow_float_frame(self, float_frame):
+        return pd.DataFrame(float_frame)
+
+    def test_rename(self, snow_float_frame):
+        mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
+
+        renamed = snow_float_frame.rename(columns=mapping)
+        renamed2 = snow_float_frame.rename(columns=str.lower)
+
+        with SqlCounter(query_count=4):
+            assert_frame_equal(renamed, renamed2)
+            assert_frame_equal(
+                renamed2.rename(columns=str.upper), snow_float_frame, check_names=False
+            )
+
+        # index
+        data = {"A": {"foo": 0, "bar": 1}}
+
+        # gets sorted alphabetical
+        # pandas 2.2.1 behavior, this is no longer the that sort index
+        # is called automatically as with pandas 2.1.4
+        # Pandas Change:
+        # https://github.com/pandas-dev/pandas/pull/55696
+        with SqlCounter(query_count=2):
+            df = DataFrame(data)
+            assert_index_equal(df.index, DataFrame(data).index)
+
+        with SqlCounter(query_count=1, join_count=1):
+            renamed = df.rename(index={"foo": "foo2", "bar": "bar2"})
+            assert_index_equal(renamed.index, Index(["foo2", "bar2"]))
+
+        # have to pass something
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError, match="must pass an index to rename"):
+                snow_float_frame.rename()
+
+        # partial columns
+        with SqlCounter(query_count=0):
+            renamed = snow_float_frame.rename(columns={"C": "foo", "D": "bar"})
+            assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"]))
+
+        # other axis
+        with SqlCounter(query_count=1, join_count=1):
+            renamed = snow_float_frame.T.rename(index={"C": "foo", "D": "bar"})
+            assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"]))
+
+        # index with name
+        with SqlCounter(query_count=3, join_count=2):
+            index = Index(["foo", "bar"], name="name")
+            renamer = DataFrame(data, index=index)
+            renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
+            assert_index_equal(renamed.index, Index(["bar", "foo"], name="name"))
+            assert renamed.index.name == renamer.index.name
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_rename_str_upper_fallback(self):
+        data = {"A": {"foo": 0, "bar": 1}}
+        renamed = DataFrame(data).rename(index=str.upper)
+        assert_index_equal(renamed.index, Index(["FOO2", "BAR2"]))
+
+    @pytest.mark.parametrize(
+        "args,kwargs",
+        [
+            ((ChainMap({"A": "a"}, {"B": "b"}),), {"axis": "columns"}),
+            ((), {"columns": ChainMap({"A": "a"}, {"B": "b"})}),
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_rename_chainmap(self, args, kwargs):
+        # see gh-23859
+        colAData = range(1, 11)
+        colBdata = np.random.randn(10)
+
+        df = DataFrame({"A": colAData, "B": colBdata})
+        result = df.rename(*args, **kwargs)
+
+        expected = native_pd.DataFrame({"a": colAData, "b": colBdata})
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @sql_count_checker(query_count=22, fallback_count=2, sproc_count=2)
+    def test_rename_multiindex_fallback(self):
+        tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
+        tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
+        index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
+        columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
+        df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
+        #
+        # without specifying level -> across all levels
+
+        renamed = df.rename(
+            index={"foo1": "foo3", "bar2": "bar3"},
+            columns={"fizz1": "fizz3", "buzz2": "buzz3"},
+        )
+        new_index = MultiIndex.from_tuples(
+            [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"]
+        )
+        new_columns = MultiIndex.from_tuples(
+            [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
+        )
+        assert_index_equal(renamed.index, new_index)
+        assert_index_equal(renamed.columns, new_columns)
+        assert renamed.index.names == df.index.names
+        assert renamed.columns.names == df.columns.names
+
+        #
+        # with specifying a level (GH13766)
+
+        # dict
+        new_columns = MultiIndex.from_tuples(
+            [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
+        )
+        renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
+        assert_index_equal(renamed.columns, new_columns)
+        renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
+        assert_index_equal(renamed.columns, new_columns)
+
+        new_columns = MultiIndex.from_tuples(
+            [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
+        )
+        renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
+        assert_index_equal(renamed.columns, new_columns)
+        renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
+        assert_index_equal(renamed.columns, new_columns)
+
+        # function
+        func = str.upper
+        new_columns = MultiIndex.from_tuples(
+            [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"]
+        )
+        renamed = df.rename(columns=func, level=0)
+        assert_index_equal(renamed.columns, new_columns)
+        renamed = df.rename(columns=func, level="fizz")
+        assert_index_equal(renamed.columns, new_columns)
+
+        new_columns = MultiIndex.from_tuples(
+            [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"]
+        )
+        renamed = df.rename(columns=func, level=1)
+        assert_index_equal(renamed.columns, new_columns)
+        renamed = df.rename(columns=func, level="buzz")
+        assert_index_equal(renamed.columns, new_columns)
+
+        # index
+        new_index = MultiIndex.from_tuples(
+            [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
+        )
+        renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
+        assert_index_equal(renamed.index, new_index)
+
+    @sql_count_checker(query_count=2)
+    def test_rename_nocopy(self, snow_float_frame):
+        renamed = snow_float_frame.rename(columns={"C": "foo"}, copy=False)
+        # copy=False is ignored in Snowpark pandas
+        assert not np.shares_memory(
+            renamed["foo"].to_pandas()._values,
+            snow_float_frame["C"].to_pandas()._values,
+        )
+
+    @sql_count_checker(query_count=0)
+    def test_rename_inplace(self, snow_float_frame):
+        snow_float_frame.rename(columns={"C": "foo"})
+        assert "C" in snow_float_frame
+        assert "foo" not in snow_float_frame
+
+        c_values = snow_float_frame["C"]
+        snow_float_frame = snow_float_frame.copy()
+        return_value = snow_float_frame.rename(columns={"C": "foo"}, inplace=True)
+        assert return_value is None
+
+        assert "C" not in snow_float_frame
+        assert "foo" in snow_float_frame
+        # GH 44153
+        # Used to be id(float_frame["foo"]) != c_id, but flaky in the CI
+        assert snow_float_frame["foo"] is not c_values
+
+    @sql_count_checker(query_count=1)
+    def test_rename_bug(self):
+        # GH 5344
+        # rename set ref_locs, and set_index was not resetting
+        df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]})
+        df = df.rename(columns={0: "a"})
+        df = df.rename(columns={1: "b"})
+        df = df.set_index(["a", "b"])
+        df.columns = ["2001-01-01"]
+        expected = native_pd.DataFrame(
+            [[1], [2]],
+            index=MultiIndex.from_tuples(
+                [("foo", "bah"), ("bar", "bas")], names=["a", "b"]
+            ),
+            columns=["2001-01-01"],
+        )
+        assert_frame_equal(df, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.mark.skip(reason="TODO: SNOW-841607 support rename multiindex dataframe")
+    def test_rename_bug2(self):
+        # GH 19497
+        # rename was changing Index to MultiIndex if Index contained tuples
+
+        df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"])
+        # note that this won't work in sp fallback because in fallback, we need to call to_pandas() which will turn
+        # variant into string, then the rename below won't find the right original key
+        df = df.rename({(1, 1): (5, 4)}, axis="index")
+        expected = native_pd.DataFrame(
+            data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]
+        )
+        assert_index_equal(df.index, expected.index)
+        assert_frame_equal(df, expected, check_dtype=False, check_index_type=False)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_errors_raises(self):
+        df = DataFrame(columns=["A", "B", "C", "D"])
+        with pytest.raises(KeyError, match="'E'] not found in axis"):
+            df.rename(columns={"A": "a", "E": "e"}, errors="raise")
+
+    @pytest.mark.parametrize(
+        "mapper, errors, expected_columns",
+        [
+            ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]),
+            ({"A": "a"}, "raise", ["a", "B", "C", "D"]),
+            (str.lower, "raise", ["a", "b", "c", "d"]),
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_rename_errors(self, mapper, errors, expected_columns):
+        # GH 13473
+        # rename now works with errors parameter
+        df = DataFrame(columns=["A", "B", "C", "D"])
+        result = df.rename(columns=mapper, errors=errors)
+        expected = native_pd.DataFrame(columns=expected_columns)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.fixture(scope="function")
+    def snow_float_string_frame(self, float_string_frame):
+        return pd.DataFrame(float_string_frame)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_objects(self, snow_float_string_frame):
+        renamed = snow_float_string_frame.rename(columns=str.upper)
+
+        assert "FOO" in renamed
+        assert "foo" not in renamed
+
+    @sql_count_checker(query_count=6, join_count=2)
+    def test_rename_axis_style(self):
+        # https://github.com/pandas-dev/pandas/issues/12392
+        df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
+        expected = native_pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
+
+        result = df.rename(str.lower, axis=1)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename(str.lower, axis="columns")
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename({"A": "a", "B": "b"}, axis=1)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename({"A": "a", "B": "b"}, axis="columns")
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        # Index
+        expected = native_pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+        result = df.rename({"X": "x", "Y": "y"}, axis=0)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename({"X": "x", "Y": "y"}, axis="index")
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @sql_count_checker(query_count=24, fallback_count=3, sproc_count=3)
+    def test_rename_axis_style_fallback(self):
+        df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
+        expected = native_pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+        result = df.rename(str.lower, axis=0)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename(str.lower, axis="index")
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+        result = df.rename(mapper=str.lower, axis="index")
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+    def test_rename_mapper_multi(self):
+        df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index(
+            ["A", "B"]
+        )
+        result = df.rename(str.upper)
+        expected = df.rename(index=str.upper)
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_rename_positional_named(self):
+        # https://github.com/pandas-dev/pandas/issues/12392
+        df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
+        result = df.rename(index=str.lower, columns=str.upper)
+        expected = native_pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_axis_style_raises(self):
+        # see gh-12392
+        df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
+
+        # Named target and axis
+        over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(index=str.lower, axis=1)
+
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(index=str.lower, axis="columns")
+
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(columns=str.lower, axis="columns")
+
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(index=str.lower, axis=0)
+
+        # Multiple targets and axis
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(str.lower, index=str.lower, axis="columns")
+
+        # Too many targets
+        over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
+        with pytest.raises(TypeError, match=over_spec_msg):
+            df.rename(str.lower, index=str.lower, columns=str.lower)
+
+        # Duplicates
+        with pytest.raises(TypeError, match="multiple values"):
+            df.rename(id, mapper=id)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_positional_raises(self):
+        # GH 29136
+        df = DataFrame(columns=["A", "B"])
+        msg = r"positional arguments"
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename(None, str.lower)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_no_mappings_raises(self):
+        # GH 29136
+        df = DataFrame([[1]])
+        msg = "must pass an index to rename"
+        with pytest.raises(TypeError, match=msg):
+            df.rename()
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename(None, index=None)
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename(None, columns=None)
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename(None, columns=None, index=None)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_mapper_and_positional_arguments_raises(self):
+        # GH 29136
+        df = DataFrame([[1]])
+        msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
+        with pytest.raises(TypeError, match=msg):
+            df.rename({}, index={})
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename({}, columns={})
+
+        with pytest.raises(TypeError, match=msg):
+            df.rename({}, columns={}, index={})
+
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_rename_with_duplicate_columns(self):
+        # GH#4403
+        df4 = DataFrame(
+            {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]},
+            index=MultiIndex.from_tuples(
+                [(600809, 20130331)], names=["STK_ID", "RPT_Date"]
+            ),
+        )
+
+        df5 = DataFrame(
+            {
+                "RPT_Date": [20120930, 20121231, 20130331],
+                "STK_ID": [600809] * 3,
+                "STK_Name": ["饡驦", "饡驦", "饡驦"],
+                "TClose": [38.05, 41.66, 30.01],
+            },
+            index=MultiIndex.from_tuples(
+                [(600809, 20120930), (600809, 20121231), (600809, 20130331)],
+                names=["STK_ID", "RPT_Date"],
+            ),
+        )
+        k = df4.join(df5, how="inner", lsuffix="_x", rsuffix="_y")
+        df_with_dup = pd.DataFrame(k)
+        result = df_with_dup.rename(
+            columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}
+        )
+
+        expected = native_pd.DataFrame(
+            [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]],
+            columns=[
+                "RT",
+                "TClose",
+                "TExg",
+                "RPT_Date",
+                "STK_ID",
+                "STK_Name",
+                "QT_Close",
+            ],
+        ).set_index(["STK_ID", "RPT_Date"], drop=False)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_rename_boolean_index(self):
+        df = DataFrame(np.arange(15).reshape(3, 5), columns=[False, True, 2, 3, 4])
+        mapper = {0: "foo", 1: "bar", 2: "bah"}
+        res = df.rename(index=mapper)
+        exp = native_pd.DataFrame(
+            np.arange(15).reshape(3, 5),
+            columns=[False, True, 2, 3, 4],
+            index=[
+                "foo",
+                "bar",
+                "bah",
+            ],
+        )
+        assert_frame_equal(res, exp, check_dtype=False, check_index_type=False)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_copy_warning(self, float_frame, caplog):
+        caplog.at_level(logging.WARNING)
+        snow_float_frame = pd.DataFrame(float_frame)
+        msg = "The argument `copy` of `dataframe.rename` has been ignored by Snowpark pandas API"
+        caplog.clear()
+        snow_float_frame.rename(columns={"C": "foo"})
+        assert msg not in caplog.text
+
+        snow_float_frame.rename(columns={"C": "foo"}, copy=True)
+        assert msg in caplog.text
diff --git a/tests/integ/modin/frame/test_replace.py b/tests/integ/modin/frame/test_replace.py
new file mode 100644
index 00000000000..2d8daf4f0ec
--- /dev/null
+++ b/tests/integ/modin/frame/test_replace.py
@@ -0,0 +1,210 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import no_default
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture
+def snow_df():
+    return pd.DataFrame(
+        {"col1": ["one", "two", "three", pd.NA], "col2": ["abc", "pqr", "xyz", None]}
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("one", "ONE"),  # scalar -> scalar
+        ("one", pd.NA),  # scalar -> NULL
+        ("one", None),  # scalar -> None
+        (pd.NA, "ONE"),  # NULL -> scalar
+        (pd.NaT, "ONE"),  # NULL -> scalar
+        (np.NaN, "ONE"),  # NULL -> scalar
+        (["one"], ["ONE"]),  # list -> list
+        ("four", "FOUR"),  # no matching value
+        (["one", "two"], ["two", "one"]),  # swap values
+        (["one", "two"], "not_three"),  # list -> scalar
+        (["one", "two"], ("ONE", "TWO")),  # list -> tuple
+        (("one", "two"), ["ONE", "TWO"]),  # tuple -> list
+        ({"one": "ONE", "two": "TWO"}, no_default),  # dict -> no_default
+    ],
+)
+@pytest.mark.parametrize("regex", [True, False])
+@sql_count_checker(query_count=2)
+def test_replace_all_columns(to_replace, value, regex, snow_df):
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(to_replace, value, regex=regex),
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("^on.", "ONE"),  # scalar -> scalar
+        ("^on.", pd.NA),  # scalar -> NULL
+        ("^on.", None),  # scalar -> None
+        (["^on."], ["ONE"]),  # list -> list
+        ("^fou.", "FOUR"),  # no matching value
+        ([r"^on.$", "^tw."], ["two", "one"]),  # swap values
+        (["^on.", "^tw."], "not_three"),  # list -> scalar
+        (["^on.", "^tw."], ("ONE", "TWO")),  # list -> tuple
+        (["^on.", "^tw."], ["ONE", "TWO"]),  # list -> list
+        ({"^on.": "ONE", "^tw.": "TWO"}, no_default),  # dict -> no_default
+    ],
+)
+@sql_count_checker(query_count=4)
+def test_replace_regex(to_replace, value, snow_df):
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(to_replace, value, regex=True),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(regex=to_replace, value=value),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_replace_regex_capture_groups():
+    snow_df = pd.DataFrame(["foo1", "bar34", "56baz78"])
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(to_replace=r"^([a-z]+)(\d*)", value=r"\1", regex=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("one", {"col1": "A_ONE", "col2": "B_ONE"}),  # scalar -> dict
+        ("abc", {"col1": "A_ABC", "col2": "B_ABC"}),  # scalar -> dict
+        ({"col1": "one", "col2": "abc"}, "NEW"),  # dict -> scalar
+        ({"col1": "one", "col2": "abc"}, None),  # dict -> None
+        ({"col1": "one", "col2": "abc"}, pd.NA),  # dict -> NULL
+        ({"col1": ["one", "two"], "col2": "abc"}, "NEW"),  # dict with list -> scalar
+        ({"col1": ("one", "two"), "col2": "abc"}, "NEW"),  # dict with tuple -> scalar
+        (
+            {"col1": ("one", "two"), "col2": "abc"},
+            {"col1": ("ONE", "TWO")},
+        ),  # dict -> dict
+        ({"col1": ["one", "two"], "col2": "abc"}, {"col1": "NEW"}),  # dict -> dict
+        ({"col1": {"one": "ONE"}, "col2": {"abc": "ABC"}}),  # nested dict -> no_default
+        (pd.NA, {"col1": "one", "col2": "abc"}),  # NULL -> dict
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_replace_selected_columns(to_replace, value, snow_df):
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.replace(to_replace, value)
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_replace_selected_columns_mixed_types():
+    df = pd.DataFrame(
+        {"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9], "C": ["a", "b", "c", "d", "e"]}
+    )
+    eval_snowpark_pandas_result(
+        df, df.to_pandas(), lambda d: d.replace({"A": {0: 100, 4: 400}})
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_replace_method_negative(snow_df):
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas replace API does not support 'method' parameter",
+    ):
+        snow_df.replace("abc", "ABC", method="pad")
+
+
+@sql_count_checker(query_count=0)
+def test_replace_limit_negative(snow_df):
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas replace API does not support 'limit' parameter",
+    ):
+        snow_df.replace("abc", "ABC", limit=10)
+
+
+@sql_count_checker(query_count=0)
+def test_replace_no_value_negative(snow_df):
+    # pandas will not raise error instead uses 'pad' method to replace values.
+    with pytest.raises(ValueError, match="Explicitly specify the new values instead"):
+        snow_df.replace(to_replace="abc")
+
+
+@sql_count_checker(query_count=1)
+def test_non_bool_regex_negative(snow_df):
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(to_replace="abc", value="ABC", regex="abc"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="'to_replace' must be 'None' if 'regex' is not a bool",
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        (["abc", "xyz"], ["NEW"]),
+        (["abc", "xyz"], ["ABC", "XYZ", "NEW"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_replace_length_mismatch_negative(snow_df, to_replace, value):
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.replace(to_replace, value),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Replacement lists must match in length",
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value, regex",
+    [
+        ("one", "ONE", False),  # scalar -> scalar
+        (["one"], ["ONE"], False),  # list -> list
+        ({"one": "ONE", "two": "TWO"}, no_default, False),  # dict -> no_default
+        ("^on.", "ONE", True),  # scalar -> scalar regex
+        (["^on."], ["ONE"], True),  # list -> list  regex
+        ({"^on.": "ONE", "^tw.": "TWO"}, no_default, True),  # dict -> no_default
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        [[1, 2, 3, 4]],
+        [["one", "two", "three", "four"]],
+        native_pd.MultiIndex.from_tuples(
+            [("one", 1), ("two", 2), ("three", 3), ("four", 4)]
+        ),
+    ],
+)
+def test_replace_index(snow_df, to_replace, value, regex, index):
+    snow_df = snow_df.set_index(index)
+    expected_join_count = 4 if isinstance(index, native_pd.MultiIndex) else 2
+    with SqlCounter(query_count=2, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.replace(to_replace, value, regex=regex),
+        )
diff --git a/tests/integ/modin/frame/test_repr.py b/tests/integ/modin/frame/test_repr.py
new file mode 100644
index 00000000000..3017db58f3a
--- /dev/null
+++ b/tests/integ/modin/frame/test_repr.py
@@ -0,0 +1,233 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.query_history import QueryRecord
+from snowflake.snowpark.session import Session
+from tests.integ.modin.conftest import IRIS_DF
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+
+# expected_query_count is for test_repr_html paramterized SqlCounter test
+_DATAFRAMES_TO_TEST = [
+    (
+        native_pd.DataFrame(
+            {
+                "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
+                "Max Speed": [380.0, 370.0, 24.0, 26.0],
+            }
+        ),
+        1,
+    ),
+    (
+        IRIS_DF,
+        6,
+    ),
+    (
+        native_pd.DataFrame(),
+        1,
+    ),
+    (
+        native_pd.DataFrame(
+            {"A": list(range(10000)), "B": np.random.normal(size=10000)}
+        ),
+        6,
+    ),
+    (
+        native_pd.DataFrame(columns=["A", "B", "C", "D", "C", "B", "A"]),
+        1,
+    ),
+    # one large dataframe to test many columns
+    (
+        native_pd.DataFrame(columns=[f"x{i}" for i in range(300)]),
+        1,
+    ),
+    # one large dataframe to test both columns/rows
+    (
+        native_pd.DataFrame(
+            data=np.zeros(shape=(300, 300)), columns=[f"x{i}" for i in range(300)]
+        ),
+        6,
+    ),
+]
+
+
+@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
+def test_repr(native_df, expected_query_count):
+    snow_df = pd.DataFrame(native_df)
+
+    native_str = repr(native_df)
+    # only measure select statements here, creation of dfs may yield a couple
+    # CREATE TEMPORARY TABLE/INSERT INTO queries
+    with SqlCounter(query_count=expected_query_count, select_count=1):
+        snow_str = repr(snow_df)
+
+        assert native_str == snow_str
+
+
+@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
+def test_repr_html(native_df, expected_query_count):
+
+    # TODO: SNOW-916596 Test this with Jupyter notebooks.
+    # joins due to temp table creation
+    snow_df = pd.DataFrame(native_df)
+
+    # in Snowpark Pandas, we set "display.max_columns" to 20 to limit the query, simulate this behavior here
+    # because Pandas uses default value of 0
+
+    native_html = native_df._repr_html_()
+    snow_html = snow_df._repr_html_()
+
+    native_html = native_df._repr_html_()
+
+    # 10 of these are related to stored procs, inserts, alter session query tag.
+    with SqlCounter(query_count=expected_query_count, select_count=1):
+        snow_html = snow_df._repr_html_()
+
+    assert native_html == snow_html
+
+
+class ReprQueryListener:
+    """A context manager that listens to and records SQL queries that are pushed down to the Snowflake database
+    if they are used for `repr` or `_repr_html_`.
+
+    See also:
+        :meth:`snowflake.snowpark.Session.query_history`.
+    """
+
+    def __init__(self, session: Session) -> None:
+        self.session = session
+        self.session._conn.add_query_listener(self)
+        self._queries = []
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.session._conn.remove_query_listener(self)
+
+    def _add_query(self, query_record: QueryRecord):
+        # Any query for `repr` or `_repr_html_` will include
+        # `<= 31` in the SQL text.
+        if "<= 31" in query_record.sql_text:
+            self._queries.append(query_record)
+
+    @property
+    def queries(self) -> list[QueryRecord]:
+        return [query.sql_text for query in self._queries]
+
+
+@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
+def test_repr_and_repr_html_issue_same_query(native_df, expected_query_count):
+    """This test ensures that the same query is issued for both `repr` and `repr_html`
+    in order to take advantage of Snowflake server side caching when both are called back
+    to back (as in the case with displaying a DataFrame in a Jupyter notebook)."""
+
+    native_df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    snow_df = pd.DataFrame(native_df)
+
+    with ReprQueryListener(pd.session) as listener:
+        with SqlCounter(query_count=1, select_count=1):
+            repr_str = repr(snow_df)
+        with SqlCounter(query_count=1, select_count=1):
+            repr_html = snow_df._repr_html_()
+
+    assert repr_str == repr(native_df)
+    assert repr_html == native_df._repr_html_()
+
+    assert len(listener.queries) == 2
+    assert listener.queries[0] == listener.queries[1]
+
+
+class TestWithGlobalSettings:
+    def setup_class(self):
+        self.native_num_rows = native_pd.get_option("display.max_rows")
+        self.native_num_cols = native_pd.get_option("display.max_columns")
+
+        self.num_rows = pd.get_option("display.max_rows")
+        self.num_cols = pd.get_option("display.max_columns")
+
+    def teardown_class(self):
+        native_pd.set_option("display.max_rows", self.native_num_rows)
+        native_pd.set_option("display.max_columns", self.native_num_cols)
+        pd.set_option("display.max_rows", self.num_rows)
+        pd.set_option("display.max_columns", self.num_cols)
+
+    def test_with_max_columns(self):
+        native_pd.set_option("display.max_columns", 10)
+        pd.set_option("display.max_columns", 10)
+
+        native_df = native_pd.DataFrame(
+            data=np.zeros(shape=(40, 40)), columns=[f"x{i}" for i in range(40)]
+        )
+        snow_df = pd.DataFrame(native_df)
+
+        # This test should only issue 6 SQL queries given dataframe creation from large
+        # local data: 1) Creating temp table, 2) Setting query tag, 3) Inserting into temp table,
+        # 4) Unsetting query tag, 5) Select Columns, 6) Drop temp table.
+        # However, an additional 6 queries are issued to eagerly get the row count.
+        # for now, track only SELECT count here
+        with SqlCounter(select_count=1):
+            snow_str = repr(snow_df)
+        native_str = repr(native_df)
+
+        assert snow_str == native_str
+
+
+def test_repr_deviating_behavior():
+    native_df = native_pd.DataFrame(index=list(range(10000)))
+    snow_df = pd.DataFrame(native_df)
+
+    # This test should only issue 6 SQL queries given dataframe creation from large
+    # local data: 1) Creating temp table, 2) Setting query tag, 3) Inserting into temp table,
+    # 4) Unsetting query tag, 5) Select Columns, 6) Drop temp table.
+    # However, an additional 6 queries are issued to eagerly get the row count.
+    # for now, track only SELECT count here
+    with SqlCounter(select_count=1):
+        snow_str = repr(snow_df)
+
+    native_str = repr(native_df)
+
+    # Snowpark pandas and pandas deviate here, as pandas displays an index specific number of elements up to the
+    # maximum width, which is here up to 100 elements, i.e.
+    # Empty DataFrame Columns [] Index: [0, 1, 2, 3, ...]
+    # whereas Snowpark pandas uses the same format as in other APIs and will display
+    # up to pandas.get_option("display.max_rows")
+    # # Empty DataFrame Columns [] Index: [0, 1, 2, 3, ... ]
+
+    # therefore check here only 2/5 of the snow_str
+    N = (2 * len(snow_str)) // 5
+    assert native_str[:N] == snow_str[:N]
+
+
+@sql_count_checker(query_count=2, union_count=1)
+def test_repr_of_multiindex_df():
+    tuples = [
+        ("cobra", "mark i"),
+        ("cobra", "mark ii"),
+        ("sidewinder", "mark i"),
+        ("sidewinder", "mark ii"),
+        ("viper", "mark ii"),
+        ("viper", "mark iii"),
+    ]
+    index = pd.MultiIndex.from_tuples(tuples)
+    values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]]
+    df = pd.DataFrame(values, columns=["max_speed", "shield"], index=index)
+    native_df = native_pd.DataFrame(
+        values, columns=["max_speed", "shield"], index=index
+    )
+
+    # Note: There's a type mismatch here between pandas and Snowpark pandas
+    # I.e., Snowpark pandas returns Name: ('cobra', 'mark ii'), dtype: int8 whereas pandas returns Name: (cobra, mark ii), dtype: int64
+    ans = repr(df.loc[("cobra", "mark ii")])
+    native_ans = repr(native_df.loc[("cobra", "mark ii")])
+
+    # Remove ' and change int8 -> int64
+    ans = ans.replace("'", "").replace("int8", "int64")
+
+    assert ans == native_ans
diff --git a/tests/integ/modin/frame/test_reset_index.py b/tests/integ/modin/frame/test_reset_index.py
new file mode 100644
index 00000000000..15089a98cc1
--- /dev/null
+++ b/tests/integ/modin/frame/test_reset_index.py
@@ -0,0 +1,598 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import VALID_PANDAS_LABELS, eval_snowpark_pandas_result
+
+# TODO SNOW-824304: Add tests for datetime index
+
+
+@pytest.fixture(scope="function")
+def native_df_simple():
+    return native_pd.DataFrame(
+        {
+            "a": ["one", "two", "three"],
+            "b": ["abc", "pqr", "xyz"],
+        },
+        index=native_pd.Index(["a", "b", "c"], name="c"),
+    )
+
+
+@pytest.fixture(scope="function")
+def native_df_multiindex():
+    index = native_pd.MultiIndex.from_arrays(
+        [["aaa", "bbb"], ["ccc", "ddd"]], names=("a", "b")
+    )
+    native_df = native_pd.DataFrame(
+        [["one", "one"], ["two", "two"]], index=index, columns=["c", "d"]
+    )
+    return native_df
+
+
+@pytest.fixture(scope="function")
+def native_df_multiindex_multilevel():
+    # 2-level index columns
+    index = native_pd.MultiIndex.from_tuples(
+        [
+            ("bird", "falcon"),
+            ("bird", "parrot"),
+            ("mammal", "lion"),
+            ("mammal", "monkey"),
+        ],
+        names=["il1", "il2"],
+    )
+    # 3-level column index
+    columns = pd.MultiIndex.from_tuples(
+        [("speed", "max", "l3"), ("species", "type", "t3")], names=("cl1", "cl2", "cl3")
+    )
+    native_df = native_pd.DataFrame(
+        [(389.0, "fly"), (24.0, "fly"), (80.5, "run"), (np.nan, "jump")],
+        index=index,
+        columns=columns,
+    )
+    return native_df
+
+
+@sql_count_checker(query_count=1)
+def test_reset_index_drop_true(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(drop=True)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_reset_index_drop_false(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(snow_df, native_df_simple, lambda df: df.reset_index())
+
+    snow_df = snow_df.reset_index()
+    assert ["c", "a", "b"] == list(snow_df.columns)
+
+    snow_df = snow_df.reset_index()
+    assert ["index", "c", "a", "b"] == list(snow_df.columns)
+
+    snow_df = snow_df.reset_index()
+    assert ["level_0", "index", "c", "a", "b"] == list(snow_df.columns)
+
+
+@sql_count_checker(query_count=1)
+def test_reset_index_case_insensitive_conflict(native_df_simple):
+    # Verify no conflict if name is same but different case
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(names="A")
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_name_conflict_negative(native_df_simple):
+    # Provided name conflicts with existing data column.
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(drop=False, names=["a"]),
+        expect_exception=True,
+    )
+
+    # Generated name conflicts with existing data column.
+    native_df2 = native_pd.DataFrame(
+        {"index": ["one", "two"], "level_0": ["abc", "xyz"]}
+    )
+    snow_df2 = pd.DataFrame(native_df2)
+    eval_snowpark_pandas_result(
+        snow_df2, native_df2, lambda df: df.reset_index(), expect_exception=True
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_tuple_as_index_name_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(drop=False, names=("p", "q")),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "reset_func",
+    [
+        lambda df: df.reset_index(
+            names="c"
+        ),  # Provided name same as existing column name.
+        lambda df: df.reset_index(names="d"),  # Provided new name.
+        lambda df: df.reset_index(
+            names=["d"]
+        ),  # Provided new name as single element array.
+        # verify that additional value does not result in error. Instead, the additional values are ignored
+        lambda df: df.reset_index(names=["d", "e"]),
+        # empty list is same as None
+        lambda df: df.reset_index(names=[]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_reset_index_names(native_df_simple, reset_func):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(snow_df, native_df_simple, reset_func)
+
+
+@pytest.mark.parametrize("label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=1)
+def test_reset_index_names_valid_labels(native_df_simple, label):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(names=[label])
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_reset_index_data_column_pandas_index_names():
+    native_df = native_pd.DataFrame({"col1": ["one", "two"]})
+    native_df.columns.set_names(["abc"], inplace=True)
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.reset_index())
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_allow_duplicates(native_df_simple):
+    # Allow duplicates when provided name conflicts with existing data label.
+    snow_df = pd.DataFrame(native_df_simple)
+    snow_df = snow_df.reset_index(drop=False, allow_duplicates=True, names=["a"])
+    assert ["a", "a", "b"] == list(snow_df.columns)
+
+    # Verify even if allow_duplicates is True, "index" is not duplicated.
+    snow_df = pd.DataFrame({"index": ["one", "two", "three"]})
+    snow_df = snow_df.reset_index(drop=False, allow_duplicates=True)
+    assert ["level_0", "index"] == list(snow_df.columns)
+
+    # Verify that "level_0" is duplicated.
+    snow_df = pd.DataFrame(
+        {"index": ["one", "two", "three"], "level_0": ["abc", "pqr", "xyz"]}
+    )
+    snow_df = snow_df.reset_index(drop=False, allow_duplicates=True)
+    assert ["level_0", "index", "level_0"] == list(snow_df.columns)
+
+
+@sql_count_checker(query_count=2)
+def test_reset_index_duplicates_in_original_df():
+    native_df = native_pd.DataFrame([[1, 2], [3, 4]], columns=["a", "a"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(names=["a"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(names=["a"], allow_duplicates=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "reset_func",
+    [
+        lambda df: df.reset_index(inplace=True, drop=True),
+        lambda df: df.reset_index(inplace=True, drop=False),
+        lambda df: df.reset_index(inplace=True, drop=False),
+        lambda df: df.reset_index(inplace=True, drop=False, names="p"),
+        lambda df: df.reset_index(inplace=True, drop=False, names=["q"]),
+        lambda df: df.reset_index(
+            inplace=True, drop=False, allow_duplicates=True, names=["r"]
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_reset_index_inplace(native_df_simple, reset_func):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        reset_func,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=5)
+def test_reset_index_multiindex(native_df_multiindex):
+    native_df = native_df_multiindex
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.reset_index())
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.reset_index(drop=True)
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.reset_index(names=["x", "y"])
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.reset_index(names=["x", "y", "z"])
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(allow_duplicates=True, names=["c", "y"]),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_multiindex_negative(native_df_multiindex):
+    native_df = native_df_multiindex
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(names=["x"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(names=[["c", "y"]]),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("drop", [True, False])
+@sql_count_checker(query_count=7)
+def test_reset_index_level_single_index(native_df_simple, drop):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(level=[], drop=drop)
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(level=0, drop=drop)
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(level=[0], drop=drop)
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(level=["c"], drop=drop)
+    )
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.reset_index(level=[-1], drop=drop)
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level="c", names="x", drop=drop),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level="c", names=["x", "y"], drop=drop),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_level_single_index_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level="x"),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level=["a"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level=[1]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level=[-2]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level=[0, 1]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.reset_index(level=["c"], names=["a"]),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("drop", [True, False])
+@pytest.mark.parametrize(
+    "level, names",
+    [
+        ([], None),
+        (1, None),
+        ([1], None),
+        (["b"], None),
+        ([-2], None),
+        ([0, "b"], None),
+        (["b", "a"], None),
+        (["a"], ["e", "g"]),
+        # c is a duplicate but actually will not be used
+        (["b"], ["c", "e"]),
+        ([1, 0], ["e", "g"]),
+        ([1, 0], ["e", "g", "h"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_reset_index_level_multiindex(native_df_multiindex, level, names, drop):
+    native_df = native_df_multiindex
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=level, names=names, drop=drop),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_reset_index_level_duplicates(native_df_multiindex):
+    # the following behaviors are different from pandas
+    # when there are duplicates in level argument
+    snow_df_multiindex = pd.DataFrame(native_df_multiindex)
+    df = snow_df_multiindex.reset_index(level=[0, "a"])
+    # pandas returns None ("b" index is dropped), which is not reasonable
+    assert df.index.name == "b"
+    assert df.columns.tolist() == ["a", "c", "d"]
+
+
+@sql_count_checker(query_count=3)
+def test_reset_index_level_allow_duplicates(native_df_multiindex):
+    native_df = native_df_multiindex
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=["b", "a", "a"], allow_duplicates=True),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(
+            level=["b", "a"], names=["c", "d"], allow_duplicates=True
+        ),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(
+            level=["b", "a"], names=["e", "e"], allow_duplicates=True
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_reset_index_level_multiindex_negative(native_df_multiindex):
+    native_df = native_df_multiindex
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=["a", "x"], names=["c", "d"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=[2], names=["c", "d"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=[-3], names=["c", "d"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=["b"], names=["c"]),
+        expect_exception=True,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.reset_index(level=[0, 1], names=["c", "e"]),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "level, names",
+    [
+        ([1], ["a", "b"]),
+        ([1], [("a1", "b1"), ("a2", "b2")]),
+        ("il1", ["a1", ("a2", "b2")]),
+        ([0, 1], ["a", "b"]),
+        (["il1", "il2"], [("a1", "b1"), ("a2", "b2")]),
+        ([0, 1], ["a1", ("a2", "b2")]),
+        ([0, 1], None),
+    ],
+)
+@pytest.mark.parametrize("col_level", [0, 1, "cl1"])
+@pytest.mark.parametrize("fill_value", ["fill", ("f1", "f2")])
+@sql_count_checker(query_count=1)
+def test_reset_index_col_level_and_fill(
+    native_df_multiindex_multilevel, level, names, col_level, fill_value
+):
+    snow_df = pd.DataFrame(native_df_multiindex_multilevel)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_multiindex_multilevel,
+        lambda df: df.reset_index(
+            level=level, names=names, col_level=col_level, col_fill=fill_value
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "names, expected_names",
+    [
+        (["a", "b"], [("a", "a", "a"), ("b", "b", "b")]),
+        (
+            [("a1", "b1", "c1"), ("a2", "b2", "c2")],
+            [("a1", "b1", "c1"), ("a2", "b2", "c2")],
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_reset_index_col_fill_none(
+    native_df_multiindex_multilevel, names, expected_names
+):
+    snow_df = pd.DataFrame(native_df_multiindex_multilevel)
+    # According to pandas doc, if fill value is None, it repeats the index name.
+    # Note that Snowpark pandas behavior is different compare with current pandas,
+    # current pandas set the filling value with the first index name it finds, and
+    # since it handles the index in reverse order, it fills with the last index value.
+    # For example, if the index names are ['a', 'b'], 'b' is always used as filling
+    # value even when fill the index 'a'. This is because the implementation does an inplace
+    # update of col_fill, which seems an implementation bug, and not consistent with
+    # the doc.
+    # With Snowpark pandas, we provide the behavior same as the document that repeats
+    # the index name for the index to fill.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_multiindex_multilevel,
+        lambda df: df.reset_index(
+            level=[0, 1],
+            names=names if isinstance(df, pd.DataFrame) else expected_names,
+            col_level=0,
+            col_fill=None,
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "names",
+    [["a", "b"], [("a1", "b1"), ("a2", "b2")]],
+)
+@sql_count_checker(query_count=1)
+def test_reset_index_filling_default(native_df_multiindex_multilevel, names):
+    snow_df = pd.DataFrame(native_df_multiindex_multilevel)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_multiindex_multilevel,
+        lambda df: df.reset_index(level=[0, 1], names=names),
+    )
+
+
+@pytest.mark.parametrize(
+    "level, names, col_level, col_fill, error_type, msg, same_as_pandas",
+    [
+        (
+            [0, 1],
+            ["a", "b"],
+            3,
+            "",
+            IndexError,
+            "Too many levels: Index has only 3 levels, not 4",
+            True,
+        ),
+        (
+            [0, 1],
+            ["a", "b"],
+            5,
+            "",
+            IndexError,
+            "Too many levels: Index has only 3 levels, not 6",
+            True,
+        ),
+        (
+            [0, 1],
+            [("a1", "b1"), ("a2", "b2")],
+            2,
+            "",
+            ValueError,
+            "Constructed Label has 4 levels, which is larger than target level 3",
+            False,
+        ),
+        (
+            [0, 1],
+            [("a1", "b1", "c1", "d1"), ("a2", "b2", "c2", "d2")],
+            0,
+            "",
+            ValueError,
+            "Constructed Label has 4 levels, which is larger than target level 3",
+            False,
+        ),
+        (
+            [0, 1],
+            [("a1", "b1"), ("a2", "b2")],
+            0,
+            None,
+            ValueError,
+            "col_fill=None is incompatible with incomplete column name",
+            False,
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_reset_index_invalid_level_raises(
+    native_df_multiindex_multilevel,
+    level,
+    names,
+    col_level,
+    col_fill,
+    error_type,
+    msg,
+    same_as_pandas,
+):
+    snow_df = pd.DataFrame(native_df_multiindex_multilevel)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_multiindex_multilevel,
+        lambda df: df.reset_index(
+            level=level, names=names, col_level=col_level, col_fill=col_fill
+        ),
+        expect_exception=True,
+        expect_exception_type=error_type,
+        expect_exception_match=msg,
+        assert_exception_equal=same_as_pandas,
+    )
diff --git a/tests/integ/modin/frame/test_round.py b/tests/integ/modin/frame/test_round.py
new file mode 100644
index 00000000000..364523a9fea
--- /dev/null
+++ b/tests/integ/modin/frame/test_round.py
@@ -0,0 +1,172 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+all_decimals = pytest.mark.parametrize("decimals", [-2, -1, 0, 1, 2])
+zero_only_decimals = pytest.mark.parametrize("decimals", [0])
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_df_round(decimals):
+    data = [[10, 1, 1.5], [3, 2, 0]]
+
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@sql_count_checker(query_count=1)
+def test_df_round_dict_decimals():
+    data = [[10, 1, 1.5], [3, 2, 0]]
+    decimals = {2: 0}
+
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_mixed_dtypes(decimals):
+    data = [[-10, 1, 1.5], [100000, math.e, 3], [-100000, math.pi, 1]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@all_decimals
+@pytest.mark.parametrize(
+    "index",
+    [
+        "a",
+        ["a", "b"],
+        ["c", "a"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_round_index(decimals, index):
+    data = [[-10, 1, 1.5], [100000, math.e, np.nan], [-100000, math.pi, 1]]
+    native_df = native_pd.DataFrame(data, columns=["a", "b", "c"]).set_index(index)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_np_types(decimals):
+    data = [
+        [-np.int16(1), 1, 1.5],
+        [100000, math.e, np.float64(32.33)],
+        [-np.double(2.6), math.pi, np.int8(3)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_neg_odd_half(decimals):
+    data = [
+        [-np.double(1.5), -np.double(3.5), -np.double(5.5)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@zero_only_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_neg_even_half(decimals):
+    data = [
+        [-np.double(2.5), -np.double(4.5), -np.double(6.5)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        round(snow_df, decimals),
+        round(native_pd.DataFrame(native_df), decimals) - 1,
+    )
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_pos_odd_half(decimals):
+    data = [
+        [np.double(1.5), np.double(3.5), np.double(5.5)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.round(decimals))
+
+
+@zero_only_decimals
+@sql_count_checker(query_count=1)
+def test_df_round_pos_even_half(decimals):
+    data = [
+        [np.double(2.5), np.double(4.5), np.double(6.5)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        round(snow_df, decimals),
+        round(native_pd.DataFrame(native_df), decimals) + 1,
+    )
+
+
+@all_decimals
+@pytest.mark.parametrize(
+    "invalid_value, expected_sf_error",
+    [
+        ([None, False, False], "Invalid argument types for function"),
+        ([False, True, None], "Invalid argument types for function"),
+        (["string_1", "string_2", "string_3"], "is not recognized"),
+        ([3, -np.int8(3), "bad_str"], " Numeric value 'bad_str' is not recognized"),
+    ],
+)
+def test_df_round_invalid_in_sf_negative(decimals, invalid_value, expected_sf_error):
+    # testing and documenting behaviors that work in native
+    # pandas but not in SF
+    invalid_df = np.transpose([invalid_value, [-np.float16(23.333), 3, -9]])
+    snow_df = pd.DataFrame(invalid_df)
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException, match=expected_sf_error):
+            round(snow_df, decimals).to_pandas()
+
+
+@sql_count_checker(query_count=0)
+def test_df_round_unsupported_series_decimals():
+    data = [[10, 1, 1.5], [3, 2, 0]]
+    decimals = pd.Series([0, 0, 1])
+
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    with pytest.raises(NotImplementedError):
+        snow_df.round(decimals)
diff --git a/tests/integ/modin/frame/test_sample.py b/tests/integ/modin/frame/test_sample.py
new file mode 100644
index 00000000000..ffa5ee63cd2
--- /dev/null
+++ b/tests/integ/modin/frame/test_sample.py
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.fixture(params=[True, False])
+def ignore_index(request):
+    return request.param
+
+
+@sql_count_checker(query_count=1)
+def test_df_sample_cols():
+    data = np.random.randint(100, size=(20, 20))
+
+    snow_df = pd.DataFrame(data)
+
+    sampled_df = snow_df.sample(5, axis=1)
+    assert sampled_df.shape == (20, 5)
+
+
+@pytest.mark.parametrize("n", [0, 1, 10, 20])
+@sql_count_checker(query_count=4)
+def test_df_sample_rows_n(n, ignore_index):
+    sample_df = pd.DataFrame(np.random.randint(100, size=(20, 20))).sample(
+        n=n, ignore_index=ignore_index
+    )
+    assert len(sample_df) == n
+    assert_index_equal(sample_df.index, sample_df.index)
+
+
+@pytest.mark.parametrize("frac", [0, 0.1, 0.9, 1])
+@sql_count_checker(query_count=4)
+def test_df_sample_rows_frac(frac, ignore_index):
+    sample_df = pd.DataFrame(np.random.randint(100, size=(20, 20))).sample(
+        frac=frac, ignore_index=ignore_index
+    )
+    assert sample_df.index.is_unique
+    assert_index_equal(sample_df.index, sample_df.index)
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda df: df.sample(weights="abc", axis=1),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_df_sample_negative_value_error(ops):
+    with pytest.raises(ValueError):
+        ops(pd.DataFrame(np.random.randint(100, size=(20, 20))))
diff --git a/tests/integ/modin/frame/test_select_dtypes.py b/tests/integ/modin/frame/test_select_dtypes.py
new file mode 100644
index 00000000000..8f8fcb126d1
--- /dev/null
+++ b/tests/integ/modin/frame/test_select_dtypes.py
@@ -0,0 +1,164 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime as dt
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+SELECT_DTYPES_DATA = {
+    "ints": [1, 2, 3, 4],
+    "floats": [1.0, np.nan, 3.0, 4.0],
+    "pd timestamps": [
+        pd.NaT,
+        pd.Timestamp("1940-04-25"),
+        pd.Timestamp("2000-10-10"),
+        pd.Timestamp("2020-12-31"),
+    ],
+    "np datetimes": [
+        np.datetime64("1900-01-01"),
+        np.datetime64("1940-04-25"),
+        np.datetime64("2000-10-10"),
+        np.datetime64("2020-12-31"),
+    ],
+    "python datetimes": [
+        dt.datetime(year=1940, month=4, day=25, hour=1),
+        dt.datetime(year=2040, month=10, day=19),
+        dt.datetime(year=2020, month=12, day=5, hour=1, second=4),
+        dt.datetime(year=2000, month=3, day=7, hour=1, minute=10),
+    ],
+    "variant": [{}, {"k": "v"}, [], "s"],
+    "strings (object dtype)": ["one", "two", "three", "ten"],
+    "also ints": [5, 6, 7, 8],
+    # Snowpandas ignores the explicit np.int32 constructor, and instead will always return a column
+    # with int64 dtype (see TypeMapper::to_snowflake in type_utils.py).
+    "explicit int32": list(map(np.int32, [9, 10, 11, 12])),
+    "explicit int64": list(map(np.int64, [13, 14, 15, 16])),
+    "bools": [True, False, True, True],
+}
+
+INCLUDE_EXCLUDE_OPTIONS = [
+    (["number"], ["float"]),  # Select only non-float numeric columns
+    (np.number, None),  # Select only numeric columns
+    ([], "O"),  # Select only numeric columns
+    ("object", []),  # Select non-numeric columns
+    ([], "number"),  # Select non-numeric columns
+    ([int], None),
+    (float, None),
+    ([np.datetime64, bool], None),
+    (None, [np.datetime64, bool]),
+    # exclude takes precedence when subtype relation causes overlaps
+    ([np.datetime64, bool], object),
+    (object, [np.datetime64, bool]),
+    (int, np.number),
+    (np.number, int),
+]
+
+
+@pytest.mark.parametrize("include, exclude", INCLUDE_EXCLUDE_OPTIONS)
+@sql_count_checker(query_count=1)
+def test_select_dtypes_basic(include, exclude):
+    snow_df = pd.DataFrame(SELECT_DTYPES_DATA)
+    native_df = native_pd.DataFrame(SELECT_DTYPES_DATA)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.select_dtypes(include, exclude),
+    )
+
+
+# Snowpandas coerces all int columns to int64, and will thus produce different results from pandas
+# when trying to select columns declared as sized ints.
+# This test is in place to track any potential changes to this behavior in Snowpandas.
+@pytest.mark.parametrize(
+    "include, snow_result",
+    [
+        # With int32, Snowpandas will miss the column that was explicitly specified as int32
+        [np.int32, native_pd.DataFrame(columns=[], index=[0, 1, 2, 3])],
+        # With int64, Snowpandas will include all int columns, including the column that was
+        # explicitly specified as int32
+        [np.int64, native_pd.DataFrame(SELECT_DTYPES_DATA).select_dtypes(int)],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_select_dtypes_sized_int_negative(include, snow_result):
+    snow_df = pd.DataFrame(SELECT_DTYPES_DATA)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_df.select_dtypes(include=include), snow_result
+    )
+
+
+@pytest.mark.parametrize("include, exclude", INCLUDE_EXCLUDE_OPTIONS)
+@sql_count_checker(query_count=1)
+def test_select_dtypes_empty_frame(include, exclude):
+    snow_df = pd.DataFrame([])
+    native_df = native_pd.DataFrame([])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.select_dtypes(include, exclude),
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize("include, exclude", INCLUDE_EXCLUDE_OPTIONS)
+@sql_count_checker(query_count=1)
+def test_select_dtypes_duplicate_col_names(include, exclude):
+    # 3x3 dataframe with 3 columns of int, float, and obj, all with the same name
+    data = [
+        [1, 1.1, "a"],
+        [2, 2.1, "b"],
+        [3, 3.1, "c"],
+    ]
+    columns = ["col"] * 3
+    snow_df = pd.DataFrame(data, columns=columns)
+    native_df = native_pd.DataFrame(data, columns=columns)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.select_dtypes(include, exclude),
+    )
+
+
+@pytest.mark.parametrize(
+    "include, exclude, exc, exc_match",
+    [
+        ([], [], ValueError, "at least one of include or exclude must be nonempty"),
+        (None, None, ValueError, "at least one of include or exclude must be nonempty"),
+        # python `int` is equivalent to any np.int dtype, but it is fine for an type in
+        # `include` to be a strict subtype of a type in `exclude` or vice versa
+        (int, int, ValueError, "include and exclude overlap"),
+        ([int], ["O", int], ValueError, "include and exclude overlap"),
+        (["O", int], [int], ValueError, "include and exclude overlap"),
+        (int, np.int32, ValueError, "include and exclude overlap"),
+        (int, np.int64, ValueError, "include and exclude overlap"),
+        ("datetime", np.datetime64, ValueError, "include and exclude overlap"),
+        ("O", object, ValueError, "include and exclude overlap"),
+        # string dtypes are prohibited by pandas
+        (str, None, TypeError, "string dtypes are not allowed, use 'object' instead"),
+        (None, str, TypeError, "string dtypes are not allowed, use 'object' instead"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_select_dtypes_invalid_args(include, exclude, exc, exc_match):
+    snow_df = pd.DataFrame(SELECT_DTYPES_DATA)
+    native_df = native_pd.DataFrame(SELECT_DTYPES_DATA)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.select_dtypes(include, exclude),
+        expect_exception=True,
+        expect_exception_type=exc,
+        expect_exception_match=exc_match,
+        assert_exception_equal=True,
+    )
diff --git a/tests/integ/modin/frame/test_set_index.py b/tests/integ/modin/frame/test_set_index.py
new file mode 100644
index 00000000000..20d85009a13
--- /dev/null
+++ b/tests/integ/modin/frame/test_set_index.py
@@ -0,0 +1,566 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture
+def snow_df():
+    return pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5], ("c", "d"): [0, 0, 1]})
+
+
+@pytest.fixture(scope="function")
+def native_df():
+    return native_pd.DataFrame({"a": [1, 2, 2], "b": [3, 4, 5], ("c", "d"): [0, 0, 1]})
+
+
+@pytest.fixture(params=[True, False])
+def append(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def drop(request):
+    return request.param
+
+
+@sql_count_checker(query_count=4, join_count=2)
+def test_set_index_multiindex(snow_df, native_df):
+    index = native_pd.MultiIndex.from_tuples([(5, 4), (4, 5), (5, 5)])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(index),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_set_index_empty_column():
+    snow_df = pd.DataFrame(
+        [
+            {"a": 1, "p": 0},
+            {"a": 2, "m": 10},
+            {"a": 3, "m": 11, "p": 20},
+            {"a": 4, "m": 12, "p": 21},
+        ],
+        # All values in columns 'x' are None.
+        columns=["a", "m", "p", "x"],
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.set_index(["a", "x"])
+    )
+
+
+@sql_count_checker(query_count=4)
+def test_set_index_empty_dataframe():
+    # This data-types have been copied from native pandas test.
+    snow_df = pd.DataFrame(
+        {"a": pd.Series(dtype="datetime64[ns]"), "b": pd.Series(dtype="int64"), "c": []}
+    )
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.set_index(["a", "b"])
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_set_index_multiindex_columns(snow_df):
+    columns = native_pd.MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)])
+    snow_df.columns = columns
+
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.set_index(("foo", 1))
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_set_index_negative(snow_df, native_df):
+    index = native_pd.Index([1, 2])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(index),
+        expect_exception=True,
+        expect_exception_match="Length mismatch: Expected 3 rows, received array of length 2",
+        expect_exception_type=ValueError,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_set_index_dup_column_name():
+    snow_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
+    snow_df.columns = pd.Index(["A", "A", "B"])
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.set_index("A"),
+        expect_exception=True,
+        expect_exception_match="The column label 'A' is not unique",
+        expect_exception_type=ValueError,
+        assert_exception_equal=False  # we provide better error message than pandas: "ValueError: Index data must be
+        # 1-dimensional"
+    )
+
+
+def test_set_index_names(snow_df):
+    with SqlCounter(query_count=1):
+        # Verify column names becomes index names.
+        assert snow_df.set_index(["a", "b"]).index.names == ["a", "b"]
+
+    # Verify name from input index is set.
+    index = native_pd.Index([1, 2, 0])
+    index.names = ["iname"]
+    with SqlCounter(query_count=3, join_count=1):
+        assert snow_df.set_index(index).index.names == ["iname"]
+
+    # Verify names from input multiindex are set.
+    multi_index = native_pd.MultiIndex.from_arrays(
+        [[1, 1, 2], [1, 2, 1]], names=["a", "b"]
+    )
+    with SqlCounter(query_count=3, join_count=2):
+        assert snow_df.set_index(multi_index).index.names == ["a", "b"]
+
+    with SqlCounter(query_count=6, join_count=4):
+        # Verify that [MultiIndex, MultiIndex] yields a MultiIndex rather
+        # than a pair of tuples
+        multi_index2 = multi_index.rename(["C", "D"])
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.set_index([multi_index, multi_index2]),
+        )
+
+
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"], ("c", "d")])
+@sql_count_checker(query_count=1)
+def test_set_index_drop_inplace(keys, drop, inplace, native_df):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df.copy(),
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, inplace=inplace),
+        inplace=inplace,
+    )
+
+
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"], ("c", "d")])
+@sql_count_checker(query_count=1)
+def test_set_index_append(keys, drop, native_df):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, append=True),
+    )
+
+
+@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
+@sql_count_checker(query_count=1)
+def test_set_index_append_to_multiindex(keys, drop, native_df):
+    snow_df = pd.DataFrame(native_df)
+    snow_df = snow_df.set_index([("c", "d")], append=True)
+    native_df = native_df.set_index([("c", "d")], append=True)
+    # append to existing multiindex
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, append=True),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_set_index_duplicate_label_in_dataframe_negative(snow_df, drop):
+    # rename to create df with columns ['a', 'a', ('c', 'd')]
+    snow_df = snow_df.rename(columns={"b": "a"})
+    # Verify error for native pandas.
+    with pytest.raises(ValueError, match="Index data must be 1-dimensional"):
+        snow_df.to_pandas().set_index("a", drop=drop)
+    # Verify error for snowpark pandas. We use difference error message.
+    with pytest.raises(ValueError, match="The column label 'a' is not unique"):
+        snow_df.set_index("a", drop=drop)
+
+
+@sql_count_checker(query_count=1)
+def test_set_index_duplicate_label_in_keys(native_df, drop, append):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(["a", "a"], drop=drop, append=append),
+    )
+
+
+@pytest.mark.parametrize(
+    "obj_type",
+    [
+        pd.Series,
+        native_pd.Index,
+        np.array,
+        list,
+        lambda x: [list(x)],
+        lambda x: native_pd.MultiIndex.from_arrays([x]),
+    ],
+)
+def test_set_index_pass_single_array(obj_type, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    array = ["one", "two", "three"]
+    key = obj_type(array)
+    if obj_type == list:
+        # list of strings gets interpreted as list of keys
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.set_index(key, drop=drop, append=append),
+                expect_exception=True,
+            )
+    else:
+        expected_query_count = 3
+        if obj_type == pd.Series:
+            expected_query_count = 4
+        with SqlCounter(query_count=expected_query_count, join_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: df.set_index(
+                    key.to_pandas()
+                    if isinstance(df, native_pd.DataFrame)
+                    and isinstance(key, pd.Series)
+                    else key,
+                    drop=drop,
+                    append=append,
+                ),
+            )
+
+
+@pytest.mark.parametrize(
+    "obj_type",
+    [
+        pd.Series,
+        native_pd.Index,
+        np.array,
+        list,
+        lambda x: native_pd.MultiIndex.from_arrays([x]),
+    ],
+)
+def test_set_index_pass_arrays(obj_type, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    array = ["one", "two", "three"]
+    key = obj_type(array)
+    keys = ["a", obj_type(array)]
+    native_keys = ["a", key.to_pandas() if isinstance(key, pd.Series) else key]
+    with SqlCounter(query_count=3, join_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.set_index(
+                native_keys if isinstance(df, native_pd.DataFrame) else keys,
+                drop=drop,
+                append=append,
+            ),
+        )
+
+
+@pytest.mark.parametrize(
+    "obj_type2",
+    [
+        pd.Series,
+        native_pd.Index,
+        np.array,
+        list,
+        iter,
+        lambda x: native_pd.MultiIndex.from_arrays([x]),
+    ],
+)
+@pytest.mark.parametrize(
+    "obj_type1",
+    [
+        pd.Series,
+        native_pd.Index,
+        np.array,
+        list,
+        iter,
+        lambda x: native_pd.MultiIndex.from_arrays([x]),
+    ],
+)
+@sql_count_checker(query_count=4, join_count=2)
+def test_set_index_pass_arrays_duplicate(obj_type1, obj_type2, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    array = ["one", "two", "three"]
+    keys = [obj_type1(array), obj_type2(array)]
+    if obj_type1 == pd.Series:
+        obj_type1 = native_pd.Series
+    if obj_type2 == pd.Series:
+        obj_type2 = native_pd.Series
+    native_keys = [obj_type1(array), obj_type2(array)]
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(
+            native_keys if isinstance(df, native_pd.DataFrame) else keys,
+            drop=drop,
+            append=append,
+        ),
+    )
+
+
+@sql_count_checker(query_count=4, join_count=2)
+def test_set_index_pass_multiindex(drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    index_data = [["one", "two", "three"], [9, 3, 7]]
+    keys = native_pd.MultiIndex.from_arrays(index_data, names=["a", "b"])
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, append=append),
+    )
+
+
+@pytest.mark.parametrize(
+    "keys, expected_query_count",
+    [
+        (["a"], 3),
+        ([[1, 6, 6]], 5),
+    ],
+)
+def test_set_index_verify_integrity_negative(native_df, keys, expected_query_count):
+    snow_df = pd.DataFrame(native_df)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.set_index(keys, verify_integrity=True),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Index has duplicate keys",
+        )
+
+
+@pytest.mark.parametrize(
+    "keys",
+    [
+        ["foo", "bar"],  # Column names are ['a', 'b', ('c', 'd')]
+        [[1, 2, 3], "X"],  # Non-existent key 'X' in list with arrays
+        (1, 2, 3),  # Tuple always raises KeyError
+        [(1, 2, 3), "a"],  # Tuple in list also raises KeyError
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_set_index_raise_keys_negative(keys, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, append=append),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+    )
+
+
+@pytest.mark.parametrize("keys", [{"abc"}, ["a", {"abc"}]])
+@sql_count_checker(query_count=0)
+def test_set_index_raise_on_invalid_type_set_negative(keys, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    error_msg = (
+        'The parameter "keys" may be a column key, one-dimensional array,'
+        + " or a list containing only valid column keys and one-dimensional"
+        + " arrays. Received column of type <class 'set'>"
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.set_index(keys, drop=drop, append=append),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match=error_msg,
+        # Native pandas has a silly bug where it ends up adding an extra full stop in
+        # error message. So we ignore matching with native pandas here.
+        # https://github.com/pandas-dev/pandas/blob/1.5.x/pandas/core/frame.py#L6005
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "obj_type",
+    [
+        pd.Series,
+        native_pd.Index,
+        np.array,
+        iter,
+        lambda x: native_pd.MultiIndex.from_arrays([x]),
+    ],
+    ids=["Series", "Index", "np.array", "iter", "MultiIndex"],
+)
+@pytest.mark.parametrize("length", [2, 6], ids=["too_short", "too_long"])
+def test_set_index_raise_on_len(length, obj_type, drop, append, native_df):
+    snow_df = pd.DataFrame(native_df)
+    values = np.random.randint(0, 10, (length,))
+    key = obj_type(values)
+    if obj_type == pd.Series:
+        obj_type = native_pd.Series
+    native_key = obj_type(values)
+
+    msg = "Length mismatch: Expected 3 rows, received array of length.*"
+    # wrong length directly
+    with SqlCounter(query_count=2):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.set_index(
+                key if isinstance(df, pd.DataFrame) else native_key,
+                drop=drop,
+                append=append,
+            ),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=msg,
+        )
+
+    # wrong length in list
+    expected_query_count = 1
+    if obj_type == native_pd.Series:
+        expected_query_count = 0
+    keys = ["a", key]
+    native_keys = ["a", native_key]
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.set_index(
+                keys if isinstance(df, pd.DataFrame) else native_keys,
+                drop=drop,
+                append=append,
+            ),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=msg,
+        )
+
+
+class TestSetIndexCustomLabelType:
+    class CustomLabel:
+        def __init__(self, name, color) -> None:
+            self.name = name
+            self.color = color
+
+        def __str__(self) -> str:
+            return f"<Thing {repr(self.name)}>"
+
+        # necessary for pretty KeyError
+        __repr__ = __str__
+
+    class CustomLabelIterable(frozenset):
+        # need to stabilize repr for KeyError (due to random order in sets)
+        def __repr__(self) -> str:
+            tmp = sorted(self)
+            joined_reprs = ", ".join(map(repr, tmp))
+            # double curly brace prints one brace in format string
+            return f"frozenset({{{joined_reprs}}})"
+
+    @pytest.mark.parametrize(
+        "label_type",
+        [
+            CustomLabel,
+            lambda x, y: TestSetIndexCustomLabelType.CustomLabelIterable([x, y]),
+        ],
+    )
+    @sql_count_checker(query_count=6)
+    def test_set_index_custom_label_type(self, label_type):
+        label1 = label_type("One", "red")
+        label2 = label_type("Two", "blue")
+        snow_df = pd.DataFrame({label1: [0, 1], label2: [2, 3]})
+        # use custom label directly
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.set_index(label2),
+        )
+
+        # custom label wrapped in list
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.set_index([label2]),
+        )
+
+        # missing key
+        label3 = label_type("Three", "pink")
+        msg = "None of .* are in the columns"
+        # missing label directly
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.set_index(label3),
+            expect_exception=True,
+            expect_exception_type=KeyError,
+            expect_exception_match=msg,
+        )
+        # missing label in list
+        eval_snowpark_pandas_result(
+            snow_df,
+            snow_df.to_pandas(),
+            lambda df: df.set_index([label3]),
+            expect_exception=True,
+            expect_exception_type=KeyError,
+            expect_exception_match=msg,
+        )
+
+    @sql_count_checker(query_count=0)
+    def test_set_index_custom_label_type_raises(self):
+        # purposefully inherit from something unhashable
+        class UnhashableLabel(set):
+            def __init__(self, name, color) -> None:
+                self.name = name
+                self.color = color
+
+            def __str__(self) -> str:
+                return f"<Thing {repr(self.name)}>"
+
+        thing1 = UnhashableLabel("One", "red")
+        thing2 = UnhashableLabel("Two", "blue")
+        df = pd.DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
+
+        msg = 'The parameter "keys" may be a column key, .*'
+
+        with pytest.raises(TypeError, match=msg):
+            # use custom label directly
+            df.set_index(thing2)
+
+        with pytest.raises(TypeError, match=msg):
+            # custom label wrapped in list
+            df.set_index([thing2])
+
+    @pytest.mark.parametrize(
+        "sample",
+        [
+            native_pd.Index([1, 2, 3, 4], name="num"),
+            native_pd.Series([1, 2, 3, 4], name="num"),
+        ],
+    )
+    @sql_count_checker(query_count=3, join_count=1)
+    def test_set_index_with_index_series_name(self, sample):
+        df = native_pd.DataFrame(
+            {
+                "month": [1, 4, 7, 10],
+                "year": [2012, 2014, 2013, 2014],
+                "sale": [55, 40, 84, 31],
+            }
+        )
+
+        eval_snowpark_pandas_result(
+            pd.DataFrame(df),
+            df,
+            lambda df: df.set_index([sample])
+            if isinstance(df, native_pd.DataFrame)
+            else df.set_index(
+                [sample] if isinstance(sample, pd.Index) else pd.Series(sample)
+            ),
+        )
diff --git a/tests/integ/modin/frame/test_setitem.py b/tests/integ/modin/frame/test_setitem.py
new file mode 100644
index 00000000000..2be7a24fbcc
--- /dev/null
+++ b/tests/integ/modin/frame/test_setitem.py
@@ -0,0 +1,1541 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime
+import random
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas.utils import is_scalar
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+    try_cast_to_snowpark_pandas_series,
+)
+
+# these tests are from original pandas and modified slightly to cover more scenarios.
+# Original tests can be found in pandas/tests/copy_view/test_setitem.py.
+
+
+SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES = [
+    # series
+    native_pd.Series([random.randint(0, 6) for _ in range(7)]),
+    # list-like - not all cases are covered here, should be covered in series/test_loc.py
+    native_pd.Index([random.randint(0, 6) for _ in range(7)]),
+]
+
+# Values that are scalars or behave like scalar keys and items.
+SCALAR_LIKE_VALUES = [0, "xyz", None, 3.14]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        "a",
+        [True, False, True],
+        native_pd.Series([True, False, True]),
+        ["a", "b"],
+        ["b", "a"],
+        native_pd.Series(["a", "b"]),
+        native_pd.Series(["b", "a"]),
+    ],
+)
+def test_df_setitem_df_value(key):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    val = (
+        native_pd.DataFrame({"a": [10, 20, 30]})
+        if is_scalar(key)
+        else native_pd.DataFrame({"a": [10, 20, 30], "c": [40, 50, 60]})
+    )
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            _key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+            df[_key] = pd.DataFrame(val)
+        else:
+            df[key] = val
+
+    expected_query_count = 1
+    expected_join_count = 1
+
+    if isinstance(key, native_pd.Series) and key.dtype != bool:
+        # need to pull key (column index) locally
+        expected_query_count += 1
+
+    if all(isinstance(i, bool) for i in key):
+        expected_join_count += 1
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        ["a", "a"],
+        ["x", "x"],
+        ["a", "x"],
+    ],
+)
+@pytest.mark.parametrize("key_type", ["list", "series"])
+def test_df_setitem_df_value_dedup_columns(key, key_type):
+    if key_type == "series":
+        key = native_pd.Series(key)
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    val = native_pd.DataFrame({"a": [10, 20, 30], "c": [40, 50, 60]})
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            _key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+            df[_key] = pd.DataFrame(val)
+        else:
+            df[key] = val
+
+    expected_query_count = 1
+    expected_join_count = 1
+
+    if isinstance(key, native_pd.Series) and key.dtype != bool:
+        # need to pull key (column index) locally
+        expected_query_count += 1
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice(0, 1),
+        slice(0, 2),
+        slice(0, -1),
+        slice(0, -1, -2),
+        slice("0", "3"),
+        slice("0", "0"),
+        slice("1", "2"),
+        slice("2", "1"),
+        slice("2", "0", -2),
+        slice("-10", "100"),
+    ],
+)
+def test_df_setitem_slice_key_df_value(key):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    index = ["0", "1", "2"]
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    val = native_pd.DataFrame({"a": [10, 20, 30], "c": [40, 50, 60]}, index=index)
+    val = val[key]
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            _key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+            df[_key] = pd.DataFrame(val)
+        else:
+            df[key] = val
+
+    expected_join_count = 3 if isinstance(key.start, int) else 4
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "val_index",
+    [
+        ["v"],
+        ["x"],
+    ],
+)
+@pytest.mark.parametrize(
+    "val_columns",
+    [
+        ["A"],
+        ["Z"],
+    ],
+)
+@pytest.mark.parametrize(
+    "key",
+    [
+        ["A"],
+        "A",
+    ],
+)  # matching_item_columns_by_label is always True
+def test_df_setitem_df_single_value(key, val_index, val_columns):
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    val = native_pd.DataFrame([100], columns=val_columns, index=val_index)
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            df[key] = pd.DataFrame(val)
+        else:
+            df[key] = val
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(native_df), native_df, setitem, inplace=True
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_df_setitem_value_df_mismatch_num_col_negative():
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    key = "A"
+    val = native_pd.DataFrame({"a": 100, "b": 101}, index=["x"])
+
+    def setitem(df):
+        if isinstance(df, pd.DataFrame):
+            df[key] = pd.DataFrame(val)
+        else:
+            df[key] = val
+
+    eval_snowpark_pandas_result(
+        pd.DataFrame(native_df),
+        native_df,
+        setitem,
+        inplace=True,
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="shape mismatch",
+        assert_exception_equal=False,
+    )
+
+
+# matching_item_row_by_label is False here.
+@sql_count_checker(query_count=1, join_count=2)
+def test_df_setitem_array_value_duplicate_index():
+    # Case: setting an array as a new column (df[col] = arr) where df's index
+    # has duplicate values.
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+    snow_df = pd.DataFrame(native_df)
+    arr = np.array([1, 2, 3, 4], dtype="int64")
+
+    def func_insert_new_column(df):
+        df["X"] = arr
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, func_insert_new_column, inplace=True
+    )
+
+
+# matching_item_row_by_label is False here.
+@sql_count_checker(query_count=2, join_count=8)
+def test_df_setitem_array_value():
+    # Case: setting an array as a new column (df[col] = arr) copies that data
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+    arr = np.array([7, 2, 3], dtype="int64")
+
+    # test first adding a new column
+    def func_insert_new_column(df):
+        # this will trigger .insert(...) in SnowflakeQueryCompiler
+        df["c"] = arr
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, func_insert_new_column, inplace=True
+    )
+
+    # then overwrite values of existing column
+    def func_insert_new_column(df):
+        # this will trigger .setitem(...) in SnowflakeQueryCompiler
+        df["a"] = arr
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, func_insert_new_column, inplace=True
+    )
+
+
+@pytest.mark.parametrize(
+    "native_df",
+    [
+        native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]),
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            columns=["A", "B", "C"],
+            index=["x", "y", "z"],
+        ),
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+            columns=["A", "B", "C"],
+            index=["d", "d", "d"],
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_self_df_set_aligned_row_key(native_df):
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["C", "A", "B"],
+        index=native_df.index,
+    )
+
+    def setitem_helper(df):
+        df[df["A"] > 1] = (
+            item if isinstance(df, native_pd.DataFrame) else pd.DataFrame(item)
+        )
+
+    if native_df.index.has_duplicates:
+        # pandas raises an error for duplicates while Snowpark pandas does not but can generate more rows
+        with pytest.raises(
+            ValueError, match="cannot reindex on an axis with duplicate labels"
+        ):
+            setitem_helper(native_df)
+        snow = pd.DataFrame(native_df)
+        setitem_helper(snow)
+        assert_frame_equal(
+            snow,
+            native_pd.DataFrame(
+                [[1, 2, 3], [40, 50, 60], [70, 80, 90]],
+                columns=["A", "B", "C"],
+                index=["d", "d", "d"],
+            ),
+            check_dtype=False,
+        )
+    else:
+        eval_snowpark_pandas_result(
+            pd.DataFrame(native_df), native_df, setitem_helper, inplace=True
+        )
+
+
+@pytest.mark.parametrize(
+    "column",
+    [
+        native_pd.Series([3, 4, 5]),
+        [3, 4, 5],
+        native_pd.Series([42], index=[2]),
+        native_pd.Series([-2, 2], name="abc", index=[2, 1]),
+        native_pd.Series(
+            index=[100, 101, 102]
+        ),  # non-matching index will replace with NULLs
+        native_pd.Series([]),
+        ["a", "c", "b"],  # replace with different type
+        native_pd.Series(["x", "y", "z"], index=[2, 0, 1]),
+        pd.RangeIndex(3),
+        pd.Index(
+            [datetime.datetime.now(), datetime.datetime.now(), datetime.datetime.now()]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "key", ["a", "x"]
+)  # replace existing column, or add new column
+def test_df_setitem_replace_column_with_single_column(column, key):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def func_insert_new_column(df, column):
+        # this will trigger .setitem(...) in SnowflakeQueryCompiler
+
+        # convert to snow objects when testing for Snowpark pandas
+        if isinstance(df, pd.DataFrame):
+            if isinstance(column, native_pd.Index):
+                column = pd.Index(column)
+            elif isinstance(column, native_pd.Series):
+                column = try_cast_to_snowpark_pandas_series(column)
+
+        df[key] = column
+
+    expected_join_count = 2
+    if isinstance(column, native_pd.Series):
+        expected_join_count = 1
+    elif isinstance(column, native_pd.Index):
+        expected_join_count = 4
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: func_insert_new_column(df, column),
+            inplace=True,
+        )
+
+
+@pytest.mark.parametrize("value", [[], [1, 2], np.array([4, 5, 6, 7])])
+@pytest.mark.parametrize(
+    "key", ["a", "x"]
+)  # replace existing column, or add new column
+def test_df_setitem_single_column_length_mismatch(key, value):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def setitem_helper(df):
+        df[key] = value
+
+    if len(value) == 0:
+        # both pandas and Snowpark pandas raise error when value is empty
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                setitem_helper,
+                inplace=True,
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match="item to set is empty",
+                assert_exception_equal=False,
+            )
+
+    else:
+        # pandas raise error if the value length doesn't match with the column
+        with pytest.raises(ValueError):
+            setitem_helper(native_df.copy())
+        # Snowpark instead will skip extra values or fill in the missing values using the last value
+        with SqlCounter(query_count=1, join_count=2):
+            setitem_helper(snow_df)
+            if len(value) < len(native_df):
+                value = value + [value[-1]] * (len(native_df) - len(value))
+            else:
+                value = value[: len(native_df)]
+            setitem_helper(native_df)
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow_df, native_df
+            )
+
+
+@pytest.mark.parametrize(
+    "index_values, other_index_values, expect_mismatch",
+    [
+        [["a", "b", "c", "d", "e"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "c", "d", "e"], ["x", "y", "z", "w", "u"], False],
+        [["a", "b", "b", "d", "e"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "e"], ["a", "b", "b", "d", "e"], False],
+        [["a", "b", "b", "d", "e"], ["b", "b", "a", "d", "e"], True],
+        [["a", "b", "b", "d", "d"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "d"], ["a", "b", "c", "d", "e"], False],
+        [["a", "b", "b", "d", "d"], ["a", "b", "b", "d", "d"], False],
+        [["a", "b", "b", "d", "d"], ["b", "a", "d", "b", "d"], True],
+        [["a", "b", "c", "d", "e"], ["a", "b", "b", "d", "e"], True],
+        [["a", "b", "b", "d", "d"], ["a", "b", "b", "d", "e"], True],
+        [["a", "b", "b", "d", "e"], ["x", "y", "z", "u", "u"], True],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_with_unique_and_duplicate_index_values(
+    index_values, other_index_values, expect_mismatch
+):
+    data = list(range(5))
+    data1 = {"foo": data}
+    data2 = {"bar": [val * 10 for val in data]}
+    index = pd.Index(index_values, name="INDEX")
+    other_index = pd.Index(other_index_values, name="INDEX")
+
+    snow_df1 = pd.DataFrame(data1, index=index)
+    snow_df2 = pd.DataFrame(data2, index=other_index)
+
+    native_df1 = native_pd.DataFrame(data1, index=index)
+    native_df2 = native_pd.DataFrame(data2, index=other_index)
+
+    def setitem_op(df):
+        df["foo2"] = (
+            native_df2["bar"]
+            if isinstance(df, native_pd.DataFrame)
+            else snow_df2["bar"]
+        )
+        return df
+
+    if not expect_mismatch:
+        eval_snowpark_pandas_result(
+            snow_df1,
+            native_df1,
+            lambda df: setitem_op(df),
+        )
+    else:
+        # the Snowpark pandas behavior for setitem with non-unique index values is different
+        # compare with native pandas. Native pandas raise an ValueError,
+        # "cannot reindex on an axis with duplicate labels". In Snowpark pandas, we are not
+        # able to perform such check and raises an error without trigger an eager evaluation,
+        # and it exposes a left join behavior.
+        snow_res = setitem_op(snow_df1)
+        expected_res = native_df1.join(native_df2["bar"], how="left", sort=False)
+        expected_res.columns = ["foo", "foo2"]
+        assert_frame_equal(snow_res, expected_res, check_dtype=False)
+
+
+OTHER_DF_2_MIXED_TYPES_COLUMNS = native_pd.DataFrame(
+    {"d": [2.9, None, 4], "e": [4, 3, 4]}
+)
+OTHER_DF_3_COLUMNS = native_pd.DataFrame(
+    {"d": [2, 3, 4], "e": [4, 3, 4], "z": ["a", "b", "c"]}
+)
+
+
+@pytest.mark.parametrize(
+    "key,value",
+    [
+        (
+            ["a", "a"],
+            OTHER_DF_2_MIXED_TYPES_COLUMNS,
+        ),  # for duplicates, the logic here is to lookup first key "a" and replace with corresponding series
+        # from OTHER_DF_2_COLUMNS, then look up "a" again and replace again. I.e., [...] must have n columns
+        # and each key of it addressed by the i-th column of the right side which must be a DataFrame
+        # of n columns as well.
+        (["a", "c"], OTHER_DF_2_MIXED_TYPES_COLUMNS),
+        (
+            [
+                "b",
+                "a",
+            ],
+            OTHER_DF_2_MIXED_TYPES_COLUMNS,
+        ),
+        (["a", "b", "a"], OTHER_DF_3_COLUMNS),
+        (["b", "a", "c"], OTHER_DF_3_COLUMNS),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_full_columns(key, value):
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def helper(df, key, value):
+        # convert native data to Snowpark pandas
+        if isinstance(value, native_pd.DataFrame) and isinstance(df, pd.DataFrame):
+            value = pd.DataFrame(value)
+
+        df[key] = value
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: helper(df, key, value), inplace=True
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_df_setitem_lambda_dataframe():
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def masking_function(df):
+        df[lambda x: x < 2] = 8
+
+    eval_snowpark_pandas_result(snow_df, native_df, masking_function, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_setitem_lambda_series():
+    data = {"a": 1, "b": 2, "c": 3}
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    def helper(ser):
+        ser[lambda x: x < 2] = 8
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, helper, inplace=True)
+
+
+@pytest.mark.parametrize("index", [True, False], ids=["with_index", "without_index"])
+@pytest.mark.parametrize(
+    "columns", [True, False], ids=["with_columns", "without_columns"]
+)
+def test_empty_df_setitem(index, columns):
+    kwargs = {}
+    if index:
+        kwargs["index"] = [0, 1, 2]
+    if columns:
+        kwargs["columns"] = [0, 1, 2]
+    native_df = native_pd.DataFrame(**kwargs)
+    snow_df = pd.DataFrame(native_df)
+
+    def set_col(df):
+        df[0] = 1
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            set_col,
+            inplace=True,
+            check_index_type=False,
+        )
+
+    # Check that `__setitem__` with a new column
+    # when columns are specified results in correctly
+    # adding a new column.
+    if columns:
+        native_df = native_pd.DataFrame(**kwargs)
+        snow_df = pd.DataFrame(native_df)
+
+        def set_col(df):
+            df["newcol"] = 1
+
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                set_col,
+                inplace=True,
+                check_index_type=False,
+            )
+
+
+def test_df_setitem_optimized():
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    def helper(df):
+        df["a"] = 10
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+    def helper(df):
+        df["a"] = df["b"]
+
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+    def helper(df):
+        df[df.a > 0] = df[df.b < 0]
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+    def helper(df):
+        df["x"] = df.loc[df.b < 0, "b"]
+
+    with SqlCounter(query_count=1, join_count=3):
+        eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "df_key_shape_relative_to_self_shape",
+    ["same", "more_cols", "less_cols", "more_rows", "less_rows"],
+    ids=[
+        "key_is_same_shape",
+        "key_has_more_cols",
+        "key_has_less_cols",
+        "key_has_more_rows",
+        "key_has_less_rows",
+    ],
+)
+@pytest.mark.parametrize("is_df_key", [True, False], ids=["df_key", "array_key"])
+class TestDFSetitemBool2DKey:
+    @sql_count_checker(query_count=1, join_count=2)
+    def test_df_setitem_bool_2d_key_array_value(
+        self, df_key_shape_relative_to_self_shape, is_df_key
+    ):
+        data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+        snow_df = pd.DataFrame(data)
+        native_df = native_pd.DataFrame(data)
+        native_df_key = native_df % 2 == 0
+        if "less" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "less_rows":
+                native_df_key = native_df_key.iloc[:-1, :]
+            else:
+                native_df_key = native_df_key.iloc[:, :-1]
+        elif "more" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "more_rows":
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False, True, False]],
+                            index=pd.Index([3]),
+                            columns=["a", "b", "c"],
+                        ),
+                    ]
+                )
+            else:
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False], [True], [False]],
+                            columns=["d"],
+                            index=native_df_key.index,
+                        ),
+                    ],
+                    axis=1,
+                )
+        values = -1 * native_df.values
+        if not is_df_key:
+            native_df_key = native_df_key.values
+
+        def setitem_helper(df):
+            if isinstance(df, native_pd.DataFrame):
+                key = native_df_key
+                if df_key_shape_relative_to_self_shape != "same" and not is_df_key:
+                    # pandas does not support array conditionals as
+                    # setitem's key with a different shape than self
+                    # but we do, thanks to lazy evaluation, so here,
+                    # we must change the key that pandas gets.
+                    key = native_df % 2 == 0
+                    if "more" in df_key_shape_relative_to_self_shape:
+                        if df_key_shape_relative_to_self_shape == "more_rows":
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False, True, False]],
+                                        index=pd.Index([3]),
+                                        columns=["a", "b", "c"],
+                                    ),
+                                ]
+                            )
+                        else:
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False], [True], [False]],
+                                        columns=["d"],
+                                        index=key.index,
+                                    ),
+                                ],
+                                axis=1,
+                            )
+                    else:
+                        if df_key_shape_relative_to_self_shape == "less_rows":
+                            key = key.iloc[:-1, :]
+                        else:
+                            key = key.iloc[:, :-1]
+                df[key] = values
+            else:
+                key = native_df_key
+                if is_df_key:
+                    key = pd.DataFrame(key)
+                df[key] = values
+
+        eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_df_setitem_bool_2d_key_scalar_value(
+        self, df_key_shape_relative_to_self_shape, is_df_key
+    ):
+        data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+        snow_df = pd.DataFrame(data)
+        native_df = native_pd.DataFrame(data)
+        native_df_key = native_df % 2 == 0
+        if "less" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "less_rows":
+                native_df_key = native_df_key.iloc[:-1, :]
+            else:
+                native_df_key = native_df_key.iloc[:, :-1]
+        elif "more" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "more_rows":
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False, True, False]],
+                            index=pd.Index([3]),
+                            columns=["a", "b", "c"],
+                        ),
+                    ]
+                )
+            else:
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False], [True], [False]],
+                            columns=["d"],
+                            index=native_df_key.index,
+                        ),
+                    ],
+                    axis=1,
+                )
+        value = -1
+        if not is_df_key:
+            native_df_key = native_df_key.values
+
+        def setitem_helper(df):
+            if isinstance(df, native_pd.DataFrame):
+                key = native_df_key
+                if df_key_shape_relative_to_self_shape != "same" and not is_df_key:
+                    # pandas does not support array conditionals as
+                    # setitem's key with a different shape than self
+                    # but we do, thanks to lazy evaluation, so here,
+                    # we must change the key that pandas gets.
+                    key = native_df % 2 == 0
+                    if "more" in df_key_shape_relative_to_self_shape:
+                        if df_key_shape_relative_to_self_shape == "more_rows":
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False, True, False]],
+                                        index=pd.Index([3]),
+                                        columns=["a", "b", "c"],
+                                    ),
+                                ]
+                            )
+                        else:
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False], [True], [False]],
+                                        columns=["d"],
+                                        index=key.index,
+                                    ),
+                                ],
+                                axis=1,
+                            )
+                    else:
+                        if df_key_shape_relative_to_self_shape == "less_rows":
+                            key = key.iloc[:-1, :]
+                        else:
+                            key = key.iloc[:, :-1]
+                df[key] = value
+            else:
+                key = native_df_key
+                if is_df_key:
+                    key = pd.DataFrame(key)
+                df[key] = value
+
+        eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+    @sql_count_checker(query_count=1, join_count=2)
+    def test_df_setitem_bool_2d_key_df_value_mismatched_column_labels(
+        self, df_key_shape_relative_to_self_shape, is_df_key
+    ):
+        data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+        snow_df = pd.DataFrame(data)
+        native_df = native_pd.DataFrame(data)
+        native_df_key = native_df % 2 == 0
+        if "less" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "less_rows":
+                native_df_key = native_df_key.iloc[:-1, :]
+            else:
+                native_df_key = native_df_key.iloc[:, :-1]
+        elif "more" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "more_rows":
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False, True, False]],
+                            index=pd.Index([3]),
+                            columns=["a", "b", "c"],
+                        ),
+                    ]
+                )
+            else:
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False], [True], [False]],
+                            columns=["d"],
+                            index=native_df_key.index,
+                        ),
+                    ],
+                    axis=1,
+                )
+        if not is_df_key:
+            native_df_key = native_df_key.values
+        values = native_pd.DataFrame(
+            -1 * native_df.values, columns=native_pd.Index(["x", "y", "z"])
+        )
+
+        def setitem_helper(df):
+            if isinstance(df, native_pd.DataFrame):
+                key = native_df_key
+                if df_key_shape_relative_to_self_shape != "same" and not is_df_key:
+                    # pandas does not support array conditionals as
+                    # setitem's key with a different shape than self
+                    # but we do, thanks to lazy evaluation, so here,
+                    # we must change the key that pandas gets.
+                    key = native_df % 2 == 0
+                    if "more" in df_key_shape_relative_to_self_shape:
+                        if df_key_shape_relative_to_self_shape == "more_rows":
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False, True, False]],
+                                        index=pd.Index([3]),
+                                        columns=["a", "b", "c"],
+                                    ),
+                                ]
+                            )
+                        else:
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False], [True], [False]],
+                                        columns=["d"],
+                                        index=key.index,
+                                    ),
+                                ],
+                                axis=1,
+                            )
+                    else:
+                        if df_key_shape_relative_to_self_shape == "less_rows":
+                            key = key.iloc[:-1, :]
+                        else:
+                            key = key.iloc[:, :-1]
+                df[key] = values
+            else:
+                key = native_df_key
+                if is_df_key:
+                    key = pd.DataFrame(key)
+                df[key] = pd.DataFrame(values)
+
+        eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+    @sql_count_checker(query_count=1, join_count=2)
+    def test_df_setitem_bool_2d_key_df_value_mismatched_index_labels(
+        self, df_key_shape_relative_to_self_shape, is_df_key
+    ):
+        data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+        snow_df = pd.DataFrame(data)
+        native_df = native_pd.DataFrame(data)
+        native_df_key = native_df % 2 == 0
+        if "less" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "less_rows":
+                native_df_key = native_df_key.iloc[:-1, :]
+            else:
+                native_df_key = native_df_key.iloc[:, :-1]
+        elif "more" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "more_rows":
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False, True, False]],
+                            index=pd.Index([3]),
+                            columns=["a", "b", "c"],
+                        ),
+                    ]
+                )
+            else:
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False], [True], [False]],
+                            columns=["d"],
+                            index=native_df_key.index,
+                        ),
+                    ],
+                    axis=1,
+                )
+        if not is_df_key:
+            native_df_key = native_df_key.values
+        values = native_pd.DataFrame(
+            -1 * native_df.values, index=pd.Index([100, 101, 102])
+        )
+
+        def setitem_helper(df):
+            if isinstance(df, native_pd.DataFrame):
+                key = native_df_key
+                if df_key_shape_relative_to_self_shape != "same" and not is_df_key:
+                    # pandas does not support array conditionals as
+                    # setitem's key with a different shape than self
+                    # but we do, thanks to lazy evaluation, so here,
+                    # we must change the key that pandas gets.
+                    key = native_df % 2 == 0
+                    if "more" in df_key_shape_relative_to_self_shape:
+                        if df_key_shape_relative_to_self_shape == "more_rows":
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False, True, False]],
+                                        index=pd.Index([3]),
+                                        columns=["a", "b", "c"],
+                                    ),
+                                ]
+                            )
+                        else:
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False], [True], [False]],
+                                        columns=["d"],
+                                        index=key.index,
+                                    ),
+                                ],
+                                axis=1,
+                            )
+                    else:
+                        if df_key_shape_relative_to_self_shape == "less_rows":
+                            key = key.iloc[:-1, :]
+                        else:
+                            key = key.iloc[:, :-1]
+                df[key] = values
+            else:
+                key = native_df_key
+                if is_df_key:
+                    key = pd.DataFrame(key)
+                df[key] = pd.DataFrame(values)
+
+        eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+    @sql_count_checker(query_count=1, join_count=2)
+    def test_df_setitem_bool_2d_key_df_value_matched_labels(
+        self, df_key_shape_relative_to_self_shape, is_df_key
+    ):
+        data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+        snow_df = pd.DataFrame(data)
+        native_df = native_pd.DataFrame(data)
+        native_df_key = native_df % 2 == 0
+        if "less" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "less_rows":
+                native_df_key = native_df_key.iloc[:-1, :]
+            else:
+                native_df_key = native_df_key.iloc[:, :-1]
+        elif "more" in df_key_shape_relative_to_self_shape:
+            if df_key_shape_relative_to_self_shape == "more_rows":
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False, True, False]],
+                            index=pd.Index([3]),
+                            columns=["a", "b", "c"],
+                        ),
+                    ]
+                )
+            else:
+                native_df_key = native_pd.concat(
+                    [
+                        native_df_key,
+                        native_pd.DataFrame(
+                            [[False], [True], [False]],
+                            columns=["d"],
+                            index=native_df_key.index,
+                        ),
+                    ],
+                    axis=1,
+                )
+        if not is_df_key:
+            native_df_key = native_df_key.values
+        values = -1 * native_df
+
+        def setitem_helper(df):
+            if isinstance(df, native_pd.DataFrame):
+                key = native_df_key
+                if df_key_shape_relative_to_self_shape != "same" and not is_df_key:
+                    # pandas does not support array conditionals as
+                    # setitem's key with a different shape than self
+                    # but we do, thanks to lazy evaluation, so here,
+                    # we must change the key that pandas gets.
+                    key = native_df % 2 == 0
+                    if "more" in df_key_shape_relative_to_self_shape:
+                        if df_key_shape_relative_to_self_shape == "more_rows":
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False, True, False]],
+                                        index=pd.Index([3]),
+                                        columns=["a", "b", "c"],
+                                    ),
+                                ]
+                            )
+                        else:
+                            key = native_pd.concat(
+                                [
+                                    key,
+                                    native_pd.DataFrame(
+                                        [[False], [True], [False]],
+                                        columns=["d"],
+                                        index=key.index,
+                                    ),
+                                ],
+                                axis=1,
+                            )
+                    else:
+                        if df_key_shape_relative_to_self_shape == "less_rows":
+                            key = key.iloc[:-1, :]
+                        else:
+                            key = key.iloc[:, :-1]
+                df[key] = native_pd.DataFrame(values)
+            else:
+                key = native_df_key
+                if is_df_key:
+                    key = pd.DataFrame(key)
+                df[key] = pd.DataFrame(values)
+
+        eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_setitem_2d_bool_key_with_callable_value():
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    class CallableValue:
+        def __call__(self, df):
+            return df**2
+
+    def setitem_helper(df):
+        if isinstance(df, pd.DataFrame):
+            df[df % 2 == 0] = CallableValue()
+        else:
+            df[df % 2 == 0] = CallableValue()(df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, setitem_helper, inplace=True)
+
+
+@sql_count_checker(query_count=1)
+def test_df_setitem_2d_int_array_key_should_error():
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+
+    with pytest.raises(
+        TypeError, match="Must pass DataFrame or 2-d ndarray with boolean values only"
+    ):
+        snow_df[snow_df.values] = 3
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_2d_bool_key_short_array_value():
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    # pandas does not support setitem when other is a NumPy array that does not match
+    # self's shape.
+    with pytest.raises(
+        ValueError, match="other must be the same shape as self when an ndarray"
+    ):
+        native_df[native_df % 2 == 0] = -1 * native_df.iloc[:-1, :].values
+
+    snow_df[snow_df % 2 == 0] = -1 * native_df.iloc[:-1, :].values
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_df,
+        native_pd.DataFrame(
+            {"a": [1, -2, 3], "b": [-6, 5, np.nan], "c": [7, -8, np.nan]}
+        ),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_2d_bool_key_1d_array_value():
+    data = {"a": [1, 2, 3], "b": [6, 5, 4], "c": [7, 8, 8]}
+    snow_df = pd.DataFrame(data)
+    native_df = native_pd.DataFrame(data)
+
+    # pandas does not support setitem when other is a NumPy array that does not match
+    # self's shape.
+    with pytest.raises(
+        ValueError, match="other must be the same shape as self when an ndarray"
+    ):
+        native_df[native_df % 2 == 0] = np.array([1, 2, 3])
+
+    snow_df[snow_df % 2 == 0] = np.array([1, 2, 3])
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_df,
+        native_pd.DataFrame(
+            {"a": [1, 2, 3], "b": [np.nan, 5, np.nan], "c": [7, np.nan, np.nan]}
+        ),
+    )
+
+
+@pytest.mark.parametrize("key", SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES)
+@pytest.mark.parametrize("item", [range(7)])
+@sql_count_checker(query_count=0)
+def test_df_setitem_series_list_like_key_and_range_like_item_negative(
+    key, item, default_index_snowpark_pandas_df
+):
+    # df[series/list-like key] = range-like item
+    # ------------------------------------------
+    # Ranges are treated like lists. This case is not implemented yet.
+    # Example:
+    # >>> df = pd.DataFrame({"A": list(range(4)), "B": list(range(4, 8)), "C": list(range(8, 12))})
+    # >>> df
+    #    A  B   C
+    # 0  0  4   8
+    # 1  1  5   9
+    # 2  2  6  10
+    # 3  3  7  11
+    # >>> df[[1, 2, 3, 4]] = range(4)
+    # >>> df
+    #    A  B   C  1  2  3  4
+    # 0  0  4   8  0  1  2  3
+    # 1  1  5   9  0  1  2  3
+    # 2  2  6  10  0  1  2  3
+    # 3  3  7  11  0  1  2  3
+    #
+    # df[slice(None), [1, 2, 3, 4]] = range(4) raises a TypeError in native pandas: "unhashable type: 'slice'"
+
+    snowpark_df = default_index_snowpark_pandas_df
+    key = pd.Series(key) if isinstance(key, native_pd.Series) else key
+    err_msg = "Currently do not support Series or list-like keys with range-like values"
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_df[key] = item
+
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_df[slice(None), key] = item
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", [1, 12, [1, 2, 3], native_pd.Series([0, 4, 5])])
+def test_df_setitem_slice_item_negative(key, default_index_snowpark_pandas_df):
+    # df[array-like/scalar key] = slice item
+    # --------------------------------------
+    # Here, slice is treated like a scalar object and assigned as itself to given key(s). This behavior is currently
+    # not supported in Snowpark pandas.
+    #
+    # Example:
+    # >>> df = pd.DataFrame({"A": list(range(4)), "B": list(range(4, 8)), "C": list(range(8, 12))})
+    # >>> df
+    #     #    A  B   C
+    #     # 0  0  4   8
+    #     # 1  1  5   9
+    #     # 2  2  6  10
+    #     # 3  3  7  11
+    # >>> df[1] = slice(20, 30, 40)
+    # >>> df
+    #    A  B   C                  1
+    # 0  0  4   8  slice(20, 30, 40)
+    # 1  1  5   9  slice(20, 30, 40)
+    # 2  2  6  10  slice(20, 30, 40)
+    # 3  3  7  11  slice(20, 30, 40)
+
+    snowpark_df = default_index_snowpark_pandas_df
+    item = slice(20, 30, 40)
+    err_msg = (
+        "Currently do not support assigning a slice value as if it's a scalar value"
+    )
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_df[pd.Series(key) if isinstance(key, native_pd.Series) else key] = item
+
+
+@pytest.mark.parametrize(
+    "indexer",
+    [
+        ["A", "B"],
+        ["B", "A"],
+        slice(None),
+        native_pd.Series(["A", "B"]),
+        native_pd.Series(["B", "A"], index=["A", "B"]),
+        slice(1, 3),
+    ],
+)
+@pytest.mark.parametrize("item_type", ["numpy_array", "native_list"])
+def test_df_setitem_2d_array(indexer, item_type):
+    from math import prod
+
+    expected_query_count = 1
+    if isinstance(indexer, native_pd.Series):
+        expected_query_count += 1
+
+    expected_join_count = (
+        3 if isinstance(indexer, slice) and indexer.start is not None else 1
+    )
+
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "x", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    # Rather than re-shaping a NumPy array to ensure we get the correct types, we can just
+    # use pandas' setitem behavior to get the right shape.
+    item = np.arange(prod(native_df[indexer].shape)).reshape(native_df[indexer].shape)
+
+    if item_type == "native_list":
+        item = [list(i) for i in item]
+
+    def setitem_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df[indexer] = item
+        else:
+            if isinstance(indexer, native_pd.Series):
+                df[pd.Series(indexer)] = item
+            else:
+                df[indexer] = item
+
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+        )
+
+
+def test_df_setitem_2d_array_row_length_no_match():
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array(
+        [
+            [1, 2, 3, 4],
+            [5, 6, 7, 8],
+            [9, 10, 11, 12],
+            [13, 14, 15, 16],
+            [17, 18, 19, 20],
+        ]
+    )
+
+    # When there are too many rows
+    with pytest.raises(
+        ValueError,
+        match=r"setting an array element with a sequence.",
+    ):
+        native_df[:] = val
+
+    def setitem_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df[:] = val[:-1]
+        else:
+            df[:] = val
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+        )
+
+    # When there is exactly one row (pandas will broadcast, we ffill).
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def setitem_helper(df):
+        df[:] = val[:1]
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+        )
+
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def setitem_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            df[:] = [
+                list(val[0]),
+                list(val[1]),
+                list(val[1]),
+                list(val[1]),
+            ]
+        else:
+            snow_df[:] = val[:2]
+
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+        )
+
+
+def test_df_setitem_2d_array_col_length_no_match():
+    # pandas error message:
+    # ValueError: could not broadcast input array from shape <VALUE_SHAPE> into shape <INDEXED_SELF_SHAPE>
+    # Snowpark pandas error message:
+    # ValueError: shape mismatch: the number of columns <NUM_VALUE_COLS> from the item does not
+    # match with the number of columns <NUM_INDEXED_SELF_COLS> to set
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])
+
+    # When there are too few cols
+    def setitem_helper(df):
+        df[:] = val[:, :-1]
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            assert_exception_equal=False,  # Our error message is slightly different from pandas.
+            expect_exception_match="shape mismatch: the number of columns 3 from the item does not match with the number of columns 4 to set",
+        )
+
+    # When there are too many cols
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+
+    def setitem_helper(df):
+        df[:] = np.hstack((val, val[:, :1]))
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            setitem_helper,
+            inplace=True,
+            assert_exception_equal=False,  # Our error message is slightly different from pandas.
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="shape mismatch: the number of columns 5 from the item does not match with the number of columns 4 to set",  # Our error message is slightly different from pandas.
+        )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_2d_array_with_explicit_na_values():
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array(
+        [
+            [1, np.nan, 3, 4],
+            [5, np.nan, 7, 8],
+            [9, None, np.nan, 12],
+            [np.nan, None, np.nan, 15],
+        ]
+    )
+
+    def setitem_helper(df):
+        df[:] = val
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        setitem_helper,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_2d_array_with_ffill_na_values_negative():
+    # Ideally, we want NA values to be propagated if they are the last
+    # value present, but we currently do not support this.
+    native_df = native_pd.DataFrame(
+        [[91, -2, 83, 74], [95, -6, 87, 78], [99, -10, 811, 712], [913, -14, 815, 716]],
+        index=["x", "y", "z", "w"],
+        columns=["A", "B", "C", "D"],
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    val = np.array([[1, 2, 3, 4], [5, np.nan, 7, 8]])
+    ffilled_vals = np.array(
+        [[1, 2, 3, 4], [5, np.nan, 7, 8], [5, 2, 7, 8], [5, 2, 7, 8]]
+    )
+
+    def setitem_helper(df):
+        if isinstance(df, native_pd.DataFrame):
+            # This is what it would be if our ffilling would correctly propagate NA values.
+            # df[:] = [list(val[0]), list(val[1]), list(val[1]), list(val[1])]
+            # Instead, our ffill value picks the most recent *non-NA* value
+            df[:] = ffilled_vals
+        else:
+            df[:] = val
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        setitem_helper,
+        inplace=True,
+    )
+
+
+# TODO: SNOW-994624.  There are a mix of cases that differ from pandas in different ways, see jira/doc for more info.
+# 1) Sometimes sets the wrong row rather than either expected values (ex, key=True)
+# 2) Sometimes does not set any row rather than either expected values (ex, key=False)
+# 3) Sometimes sets both expected index values (ex, key=0 or key=1)
+@pytest.mark.xfail(reason="SNOW-994624 setitem boolean key is inconsistent")
+@pytest.mark.parametrize("key", [True, False, 0, 1])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        [0, 1, True, False, "x"],
+        [0, 1, True, "x"],
+        [1, True, False, "x"],
+        [1, True, "x"],
+        [0, 1, False, "x"],
+        [0, False, "x"],
+        [2, "x"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_df_setitem_boolean_key(key, columns):
+    item = 99
+    index_len = 3
+
+    data = [[i for i in range(len(columns))] for k in range(index_len)]
+
+    native_df = native_pd.DataFrame(data, columns=columns)
+    snow_df = pd.DataFrame(native_df)
+
+    native_df[key] = item
+    snow_df[key] = item
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key_type", [np.array, "dataframe"])
+def test_df_setitem_2D_key_series_value(key_type):
+    def make_key(key, key_type, data_type):
+        if key_type == np.array:
+            return key_type(key)
+        else:
+            return data_type(key)
+
+    native_df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6]])
+    with pytest.raises(ValueError, match="Must specify axis=0 or 1"):
+        native_df[
+            make_key([[True, True, True]] * 2, key_type, native_pd.DataFrame)
+        ] = native_df[0]
+
+    snow_df = pd.DataFrame(native_df)
+    with pytest.raises(
+        ValueError, match="setitem with a 2D key does not support Series values."
+    ):
+        snow_df[make_key([[True, True, True]] * 2, key_type, pd.DataFrame)] = snow_df[0]
diff --git a/tests/integ/modin/frame/test_shape.py b/tests/integ/modin/frame/test_shape.py
new file mode 100644
index 00000000000..c5f654025b0
--- /dev/null
+++ b/tests/integ/modin/frame/test_shape.py
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "dataframe_input",
+    [
+        ({"A": [1, 2], "B": [3, 4]}),
+        ({"A": [1, 2], "B": [3, 4], "C": [5, 6]}),
+        ({"A": [], "B": []}),
+        ({"A": [np.nan]}),
+    ],
+    ids=[
+        "non-empty 2x2",
+        "non-empty 2x3",
+        "empty column",
+        "np nan column",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_shape_param(dataframe_input):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(dataframe_input),
+        native_pd.DataFrame(dataframe_input),
+        lambda df: df.shape,
+        comparator=lambda x, y: x == y,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_shape_index_empty(empty_index_native_pandas_dataframe):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(empty_index_native_pandas_dataframe),
+        empty_index_native_pandas_dataframe,
+        lambda df: df.shape,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/frame/test_shift.py b/tests/integ/modin/frame/test_shift.py
new file mode 100644
index 00000000000..d8c2cb4b754
--- /dev/null
+++ b/tests/integ/modin/frame/test_shift.py
@@ -0,0 +1,98 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import random
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import no_default
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_DATAFRAMES = [
+    native_pd.DataFrame(),
+    native_pd.DataFrame(
+        {
+            "a": [None, 1, 2, 3],
+            "b": ["a", "b", None, "c"],
+            "c": [1.0, None, 3.141, 42.3567],
+        }
+    ),
+    native_pd.DataFrame(
+        {
+            "A": [1, 10, -1, 100, 0, -11],
+            "B": [321.2, 312.6, 123.7, 11.32, -0.231, -213],
+            "C": [False, True, None, True, False, False],
+            "a": ["abc", " ", "", "ABC", "_", "XYZ"],
+            "b": ["1", "10", "xyz", "0", "2", "abc"],
+            "A_none": [1, None, -1, 100, 0, None],
+            "B_none": [None, -312.2, 123.9867, 0.132, None, -0.213],
+            "a_none": [None, " ", "", "ABC", "_", "XYZ"],
+            "b_none": ["1", "10", None, "0", "2", "abc"],
+        },
+        index=pd.Index([1, 2, 3, 8, 9, 10], name="ind"),
+    ),
+]
+
+
+@pytest.mark.parametrize("df", TEST_DATAFRAMES)
+@pytest.mark.parametrize(
+    "periods", [0, -1, 1, 3, -3, 10, -10]
+)  # test here special cases and periods larger than number of rows of dataframe
+@pytest.mark.parametrize(
+    "fill_value", [None, no_default, 42]
+)  # no_default is the default value, so test explicitly as well. 42 is added to test for "type" conflicts.
+@pytest.mark.parametrize("axis", [0, 1])
+@sql_count_checker(query_count=1)
+def test_dataframe_with_values_shift(df, periods, fill_value, axis):
+
+    snow_df = pd.DataFrame(df)
+    native_df = df.copy()
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: (
+            # pandas df.shift(axis=1) is broken for empty DFs
+            # https://github.com/pandas-dev/pandas/issues/57301
+            native_df
+            if isinstance(df, native_pd.DataFrame) and native_df.empty and axis == 1
+            else df.shift(periods=periods, fill_value=fill_value, axis=axis)
+        ),
+        check_column_type=False,
+    )
+
+
+# TODO: SNOW-1023324, implement shifting index. This is a test that must work when specifying freq.
+@pytest.mark.parametrize(
+    "index",
+    [
+        native_pd.to_datetime("2021-03-03")
+        + native_pd.timedelta_range("1day", "700 days", freq="3D")
+    ],
+)
+@pytest.mark.parametrize("periods", [-1, 0, 1, 10])
+@pytest.mark.parametrize("freq", ["1Y", "1D", "4D", "1M"])
+@pytest.mark.xfail(reason="to be done in SNOW-1023324, remove negativity of this test")
+def test_time_index_shift_negative(index, periods, freq):
+
+    # the data is unaffected, therefore simply generate random data according to the length of the index
+    # and sprinkle in some NULLs (20%)
+    rng = np.random.default_rng(12345)
+    data = rng.random(len(index))
+    for _ in range(len(index) // 5):
+        data[random.randint(0, len(data) - 1)] = np.nan
+
+    native_df = native_pd.DataFrame(data, index=index)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.shift(periods=periods, freq=freq),
+        check_index_type=False,
+    )
diff --git a/tests/integ/modin/frame/test_size.py b/tests/integ/modin/frame/test_size.py
new file mode 100644
index 00000000000..9e3c8dced91
--- /dev/null
+++ b/tests/integ/modin/frame/test_size.py
@@ -0,0 +1,51 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "args, kwargs, expected_query_count",
+    [
+        ([{"A": [1, 2], "B": [3, 4], "C": [5, 6]}], {}, 1),
+        ([{"A": [], "B": []}], {}, 1),
+        ([np.random.rand(100, 10)], {}, 6),
+        (
+            [{"Value": [10, 20, 30, 40]}],
+            {
+                "index": native_pd.MultiIndex.from_arrays(
+                    [["A", "A", "B", "B"], [1, 2, 1, 2]], names=["Letter", "Number"]
+                )
+            },
+            1,
+        ),
+    ],
+    ids=["non-empty 2x3", "empty column", "100x10 random dataframe", "multi-index"],
+)
+def test_dataframe_size_param(args, kwargs, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(*args, **kwargs),
+            native_pd.DataFrame(*args, **kwargs),
+            lambda df: df.size,
+            comparator=lambda x, y: x == y,
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_size_index_empty(empty_index_native_pandas_dataframe):
+    eval_snowpark_pandas_result(
+        pd.DataFrame(empty_index_native_pandas_dataframe),
+        empty_index_native_pandas_dataframe,
+        lambda df: df.size,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/frame/test_skew.py b/tests/integ/modin/frame/test_skew.py
new file mode 100644
index 00000000000..5719d7231a1
--- /dev/null
+++ b/tests/integ/modin/frame/test_skew.py
@@ -0,0 +1,120 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_series_equal
+
+
+@sql_count_checker(query_count=1)
+def test_skew_basic():
+    native_df = native_pd.DataFrame(
+        np.array([[1, 2, 3], [2, 3, 4], [1, 2, 1], [2, 3, 3]]), columns=["A", "B", "C"]
+    )
+    snow_df = pd.DataFrame(native_df)
+    assert_series_equal(
+        snow_df.skew(),
+        native_df.skew(),
+        rtol=1.0e-5,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "frame": {"A": [1, 2, 3], "B": [2, 3, 4], "C": [1, 2, 1], "D": [2, 3, 3]},
+            "kwargs": {},
+        },
+        {
+            "frame": {"A": [1, 2, 3], "B": [2, 3, 4], "C": [1, 2, 1], "D": [2, 3, 3]},
+            "kwargs": {"axis": 0},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+                "B": [2, np.NaN, 4],
+                "C": [1, 2, np.NaN],
+                "D": [np.NaN, np.NaN, 3],
+            },
+            "kwargs": {"skipna": True},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+                "B": ["a", "b", "c"],
+                "C": [1, 2, np.NaN],
+                "D": ["x", "y", "z"],
+            },
+            "kwargs": {"numeric_only": True},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+                "B": ["a", "b", "c"],
+                "C": [1, 2, np.NaN],
+                "D": ["x", "y", "z"],
+            },
+            "kwargs": {"numeric_only": True, "skipna": True},
+        },
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_skew(data):
+    native_df = native_pd.DataFrame(data["frame"])
+    snow_df = pd.DataFrame(native_df)
+    assert_series_equal(
+        snow_df.skew(**data["kwargs"]),
+        native_df.skew(**data["kwargs"]),
+        rtol=1.0e-5,
+    )
+
+
+@pytest.mark.parametrize(
+    "unsupported",
+    [
+        {
+            "frame": {"A": [1, 2, 3], "B": [2, 3, 4], "C": [1, 2, 1], "D": [2, 3, 3]},
+            "kwargs": {"axis": 1},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+                "B": ["a", "b", "c"],
+                "C": [1, 2, np.NaN],
+                "D": ["x", "y", "z"],
+            },
+            "kwargs": {"numeric_only": False},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+            },
+            "kwargs": {"numeric_only": False},
+        },
+        {
+            "frame": {
+                "A": [1, 2, 3],
+            },
+            "kwargs": {"level": 2},
+        },
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_skew_unsupported(unsupported):
+    native_df = native_pd.DataFrame(unsupported["frame"])
+    snow_df = pd.DataFrame(native_df)
+    try:
+        snow_df.skew(**unsupported["kwargs"])
+        raise AssertionError()
+    except NotImplementedError:
+        pass
+    except TypeError:
+        # numeric_only = False on non-numeric data will produce a
+        # type error which matches pandas
+        pass
diff --git a/tests/integ/modin/frame/test_sort_index.py b/tests/integ/modin/frame/test_sort_index.py
new file mode 100644
index 00000000000..59bc28cd238
--- /dev/null
+++ b/tests/integ/modin/frame/test_sort_index.py
@@ -0,0 +1,64 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@sql_count_checker(query_count=1)
+def test_sort_index_dataframe(ascending, na_position, ignore_index):
+    native_df = native_pd.DataFrame(
+        [1, 2, np.nan, 4, 5], index=[np.nan, 29, 234, 1, 150], columns=["A"]
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_index(
+            ascending=ascending,
+            na_position=na_position,
+            ignore_index=ignore_index,
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_index_dataframe_axis_1_unsupported():
+    snow_df = pd.DataFrame(
+        [1, 2, np.nan, 4, 5], index=[np.nan, 29, 234, 1, 150], columns=["A"]
+    )
+    with pytest.raises(NotImplementedError):
+        snow_df.sort_index(axis=1)
+
+
+@sql_count_checker(query_count=0)
+def test_sort_index_dataframe_inplace_unsupported():
+    snow_df = pd.DataFrame(
+        [1, 2, np.nan, 4, 5], index=[np.nan, 29, 234, 1, 150], columns=["A"]
+    )
+    with pytest.raises(NotImplementedError):
+        snow_df.sort_index(inplace=True)
+
+
+@sql_count_checker(query_count=0)
+def test_sort_index_dataframe_multiindex_unsupported():
+    arrays = [
+        np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]),
+        np.array(["two", "one", "two", "one", "two", "one", "two", "one"]),
+    ]
+    snow_df = pd.DataFrame([1, 2, 3, 4, 5, 6, 7, 8], index=arrays, columns=["A"])
+    with pytest.raises(NotImplementedError):
+        snow_df.sort_index(level=1)
+    with pytest.raises(NotImplementedError):
+        snow_df.sort_index(sort_remaining=True)
+    with pytest.raises(NotImplementedError):
+        snow_df.sort_index(ascending=[True, False])
diff --git a/tests/integ/modin/frame/test_sort_values.py b/tests/integ/modin/frame/test_sort_values.py
new file mode 100644
index 00000000000..f7b0be03fc7
--- /dev/null
+++ b/tests/integ/modin/frame/test_sort_values.py
@@ -0,0 +1,463 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import VALID_PANDAS_LABELS, eval_snowpark_pandas_result
+
+# TODO: SNOW-782594 Add tests for categorical data.
+
+
+@pytest.fixture(scope="function")
+def native_df_simple():
+    return native_pd.DataFrame(
+        {
+            "A": [1, 10, -1, 100, 0, -11],
+            "B": [321, 312, 123, 132, 231, 213],
+            "a": ["abc", " ", "", "ABC", "_", "XYZ"],
+            "b": ["1", "10", "xyz", "0", "2", "abc"],
+        },
+        index=pd.Index([1, 2, 3, 4, 5, 6], name="ind"),
+    )
+
+
+@pytest.mark.parametrize("by", ["A", "B", "a", "b", "ind"])
+@pytest.mark.parametrize("ascending", [True, False])
+@sql_count_checker(query_count=3)
+def test_sort_values(native_df_simple, by, ascending):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.sort_values(by=by, ascending=ascending)
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=by, ascending=[ascending]),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=[by], ascending=ascending),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_unknown_by_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values("C"),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_on_duplicate_labels_negative():
+    # duplicate data labels.
+    native_df = native_pd.DataFrame(np.array([[1, 2], [3, 4]]), columns=["A", "A"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values("A"),
+        expect_exception=True,
+    )
+
+    # duplicate data and index labels.
+    native_df = native_pd.DataFrame(
+        {"a": [1, 2]}, index=pd.RangeIndex(start=4, stop=6, step=1, name="a")
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values("a"),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("by", [["A", "A"], ["ind", "ind"]])
+@sql_count_checker(query_count=1)
+def test_sort_values_duplicate_by(native_df_simple, by):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.sort_values(by)
+    )
+
+
+@pytest.mark.parametrize("by", [["a"], ["A"], ["a", "A"], ["A", "a"]])
+@sql_count_checker(query_count=1)
+def test_sort_values_case_sensitive(native_df_simple, by):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.sort_values(by)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_sort_values_empty_by(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.sort_values(by=[])
+    )
+
+
+@pytest.mark.parametrize("by", [["B", "a"], ["A", "ind"]])
+@sql_count_checker(query_count=3)
+def test_sort_values_multiple_by(native_df_simple, by):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df, native_df_simple, lambda df: df.sort_values(by)
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by, ascending=False),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by, ascending=[True, False]),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_by_ascending_length_mismatch_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=["A", "B"], ascending=[True] * 5),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "sort_op",
+    [
+        lambda df: df.sort_values(by=3, axis=1),
+        lambda df: df.sort_values(by=3, axis=1, ascending=False),
+        lambda df: df.sort_values(by=[1, 2], axis="columns"),
+        lambda df: df.sort_values(by=[1, 3], axis=1, ascending=[True, False]),
+        lambda df: df.sort_values(by=[1, 3], axis=1, ascending=False),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_sort_values_axis_1(sort_op):
+    native_df = native_pd.DataFrame(
+        [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC")
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, sort_op)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "sort_op",
+    [
+        lambda df: df.sort_values(by=3, axis=1, inplace=True),
+        lambda df: df.sort_values(by=3, axis=1, ascending=False, inplace=True),
+        lambda df: df.sort_values(by=[1, 2], axis="columns", inplace=True),
+        lambda df: df.sort_values(
+            by=[1, 3], axis=1, ascending=[True, False], inplace=True
+        ),
+        lambda df: df.sort_values(by=[1, 3], axis=1, ascending=False, inplace=True),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_sort_values_axis_1_inplace(sort_op):
+    native_df = native_pd.DataFrame(
+        [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC")
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, sort_op, inplace=True)
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_invalid_axis_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=[], axis=2),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("by", [["A"], ["A", "B"]])
+@pytest.mark.parametrize("ascending", [True, False])
+@sql_count_checker(query_count=1)
+def test_sort_values_inplace(native_df_simple, by, ascending):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=by, ascending=ascending, inplace=True),
+        inplace=True,
+    )
+
+
+@pytest.mark.skip(reason="data type mismatch, enable when SNOW-800907 is resolved")
+def test_sort_values_uint64():
+    native_df = native_pd.DataFrame(
+        {
+            "a": pd.Series([18446637057563306014, 1162265347240853609]),
+            "b": pd.Series([1, 2]),
+        }
+    )
+    native_df["a"] = native_df["a"].astype(np.uint64)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.sort_values(["a", "b"])
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@sql_count_checker(query_count=2)
+def test_sort_values_nan(ascending, na_position):
+    native_df = native_pd.DataFrame(
+        {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # sort on one column only
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values(
+            by=["A"], ascending=ascending, na_position=na_position
+        ),
+    )
+
+    # sort on multiple columns
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values(
+            by=["A", "B"], ascending=ascending, na_position=na_position
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_invalid_na_position_negative(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values("A", na_position="nulls_first"),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("kind", ["stable", "mergesort"])
+@sql_count_checker(query_count=6)
+def test_sort_values_stable(ascending, kind):
+    cola_data = [3] * 100 + [2] * 100 + [1] * 100
+    colb_data = list(np.arange(100)) * 3
+    native_df = native_pd.DataFrame({"a": cola_data, "b": colb_data})
+    snow_df = pd.DataFrame({"a": cola_data, "b": colb_data})
+    # This test should only issue 6 SQL queries given dataframe creation from large
+    # local data: 1) Creating temp table, 2) Setting query tag, 3) Inserting into temp table,
+    # 4) Unsetting query tag, 5) Sort Operation, 6) Drop temp table.
+    # However, an additional 6 queries are issued to eagerly get the index names.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values("a", ascending=ascending, kind=kind),
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("kind", ["stable", "mergesort"])
+@sql_count_checker(query_count=1)
+def test_sort_values_multi_column_stable(ascending, na_position, kind):
+    native_df = native_pd.DataFrame(
+        {
+            "a": [1, 2, 1, 1, 1, 6, 8, 4, 8, 8, 8, 8],
+            "b": [9, 5, 2, 2, 2, 5, 4, 5, 3, 4, 4, 4],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values(
+            ["a", "b"], ascending=ascending, na_position=na_position, kind=kind
+        ),
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("kind", ["stable", "mergesort"])
+@sql_count_checker(query_count=1)
+def test_sort_values_multi_column_stable_nan(ascending, na_position, kind):
+    native_df = native_pd.DataFrame(
+        {
+            "a": [1, 2, np.nan, 1, 1, 1, 6, 8, 4, 8, 8, np.nan, np.nan, 8, 8],
+            "b": [9, np.nan, 5, 2, 2, 2, 5, 4, 5, 3, 4, np.nan, np.nan, 4, 4],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.sort_values(
+            ["a", "b"], ascending=ascending, na_position=na_position, kind=kind
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_invalid_kind_negative(native_df_simple):
+    invalid_kind = "fastsort"
+    msg = r"sort kind must be 'stable' or None \(got 'fastsort'\)"
+    snow_df = pd.DataFrame(native_df_simple)
+    with pytest.raises(AssertionError, match=msg):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df_simple,
+            lambda df: df.sort_values("A", kind=invalid_kind),
+            expect_exception=True,
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_sort_values_datetime():
+    native_df = native_pd.DataFrame(
+        {
+            "A": ["a", "a", "a", "b", "c", "d", "e", "f", "g"],
+            "B": [
+                native_pd.Timestamp(x)
+                for x in [
+                    "2004-02-11",
+                    "2004-01-21",
+                    "2004-01-26",
+                    "2005-09-20",
+                    "2010-10-04",
+                    "2009-05-12",
+                    "2008-11-12",
+                    "2010-09-28",
+                    "2010-09-28",
+                ]
+            ],
+        }
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.sort_values(by="B"))
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.sort_values(by=["B", "A"])
+    )
+
+
+@pytest.mark.skip(reason="SNOW-824304 Add support for DateTime type")
+def test_sort_nat():
+    col1 = [
+        native_pd.Timestamp(x)
+        for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]
+    ]
+    col2 = [
+        native_pd.Timestamp(x)
+        for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"]
+    ]
+    native_df = native_pd.DataFrame({"a": col1, "b": col2}, index=[0, 1, 2, 3])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.sort_values(by=["a", "b"])
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@sql_count_checker(query_count=2)
+def test_sort_values_ignore_index(native_df_simple, ascending, ignore_index):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values("A", ascending=ascending, ignore_index=ignore_index),
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(
+            ["A", "B"], ascending=ascending, ignore_index=ignore_index
+        ),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "op",
+    [
+        lambda df: df.sort_values(by="A", key=lambda x: x + 5),
+        lambda df: df.sort_values(by="A", key=lambda x: -x),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_sort_values_key(native_df_simple, op):
+    # The high query count here is a result of a stored procedure fallback
+    # due to the key being a lambda function after snow_df gets materialized.
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        op,
+    )
+
+
+@pytest.mark.parametrize("label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=1)
+def test_sort_values_labels(label):
+    native_df = native_pd.DataFrame({label: ["a", "z", "b", "x"], "abc": [1, 2, 3, 4]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.sort_values(label))
+
+
+@sql_count_checker(query_count=1)
+def test_sort_values_repeat(native_df_simple):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by="A").sort_values(by="a"),
+    )
+
+
+@pytest.mark.parametrize("by", [None, [None], [None, "a"]])
+@sql_count_checker(query_count=0)
+def test_sort_values_by_is_none_negative(native_df_simple, by):
+    snow_df = pd.DataFrame(native_df_simple)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df_simple,
+        lambda df: df.sort_values(by=by),
+        expect_exception=True,
+    )
diff --git a/tests/integ/modin/frame/test_squeeze.py b/tests/integ/modin/frame/test_squeeze.py
new file mode 100644
index 00000000000..cc9091f10f4
--- /dev/null
+++ b/tests/integ/modin/frame/test_squeeze.py
@@ -0,0 +1,69 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture(params=[0, "index", 1, "columns", None])
+def axis(request):
+    """
+    cache keyword to pass to to_datetime.
+    """
+    return request.param
+
+
+def test_1d(axis):
+    if axis == 1 or axis == "columns":
+        expected_query_count = 1
+    else:
+        expected_query_count = 2
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame([1, 2, 3]),
+            native_pd.DataFrame([1, 2, 3]),
+            lambda df: df.squeeze(axis=axis),
+        )
+    if axis is None:
+        expected_query_count = 3
+    else:
+        expected_query_count = 2
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            pd.DataFrame({"a": [1], "b": [2], "c": [3]}),
+            native_pd.DataFrame({"a": [1], "b": [2], "c": [3]}),
+            lambda df: df.squeeze(axis=axis),
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_2d(axis):
+    eval_snowpark_pandas_result(
+        pd.DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}),
+        native_pd.DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}),
+        lambda df: df.squeeze(axis=axis),
+    )
+
+
+def test_scalar(axis):
+    if axis == 1 or axis == "columns":
+        expected_query_count = 1
+    else:
+        expected_query_count = 2
+    with SqlCounter(query_count=expected_query_count):
+        if axis is None:
+            assert 1 == pd.DataFrame({"A": [1]}).squeeze()
+        else:
+            # still return a dataframe/series
+            eval_snowpark_pandas_result(
+                pd.DataFrame({"A": [1]}),
+                native_pd.DataFrame({"A": [1]}),
+                lambda df: df.squeeze(axis=axis),
+            )
diff --git a/tests/integ/modin/frame/test_take.py b/tests/integ/modin/frame/test_take.py
new file mode 100644
index 00000000000..3271c96deaa
--- /dev/null
+++ b/tests/integ/modin/frame/test_take.py
@@ -0,0 +1,60 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize("test_multiindex", [False, True])
+def test_df_take(float_native_df, test_multiindex):
+    def _test_take(native_df):
+        df = pd.DataFrame(native_df)
+
+        # homogeneous
+        order = [3, 1, 2, 0]
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                df, native_df, lambda df: df.take(order, axis=0)
+            )
+
+        # axis = 1
+        with SqlCounter(query_count=1, join_count=0):
+            eval_snowpark_pandas_result(
+                df, native_df, lambda df: df.take(order, axis=1)
+            )
+
+        # negative indices
+        order = [2, 1, -1]
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(df, native_df, lambda df: df.take(order))
+        # slice
+        order = slice(1, 3)
+        with SqlCounter(query_count=1, join_count=0):
+            # assert_frame_equal(result, expected)
+            eval_snowpark_pandas_result(df, native_df, lambda df: df.take(order))
+
+        # Out-of-bounds testing - valid because .iloc is used in backend.
+        order = [3, 1, 2, 30]
+        result = df.take(order, axis=0)
+        expected = df.iloc[order]
+        with SqlCounter(query_count=2, join_count=4):
+            assert_frame_equal(result, expected)
+
+    if test_multiindex:
+        mi = pd.MultiIndex.from_arrays(
+            [
+                np.random.rand(len(float_native_df)),
+                np.random.rand(len(float_native_df)),
+            ],
+            names=["mi1", "mi2"],
+        )
+
+        float_native_df = float_native_df.set_index(mi)
+
+    _test_take(float_native_df)
diff --git a/tests/integ/modin/frame/test_to_snowflake.py b/tests/integ/modin/frame/test_to_snowflake.py
new file mode 100644
index 00000000000..7db61188591
--- /dev/null
+++ b/tests/integ/modin/frame/test_to_snowflake.py
@@ -0,0 +1,225 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import VALID_PANDAS_LABELS, VALID_SNOWFLAKE_COLUMN_NAMES
+
+
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("index_labels", [None, ["my_index"]])
+@sql_count_checker(query_count=2)
+def test_to_snowflake_index(test_table_name, index, index_labels):
+    df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
+    )
+
+    df.to_snowflake(
+        test_table_name, if_exists="replace", index=index, index_label=index_labels
+    )
+    expected_columns = []
+    if index:
+        # if index is retained in the result, add it as the first expected column
+        expected_index = ["index"]
+        if index_labels:
+            expected_index = index_labels
+        expected_columns = expected_columns + expected_index
+    # add the expected data columns
+    expected_columns = expected_columns + ["a", "b"]
+    verify_columns(test_table_name, expected_columns)
+
+
+@sql_count_checker(query_count=2)
+def test_to_snowflake_multiindex(test_table_name):
+    index = native_pd.MultiIndex.from_arrays(
+        [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
+    )
+    native_df = native_pd.DataFrame(
+        [[1] * 2, [2] * 2, [3] * 2, [4] * 2], index=index, columns=["a", "b"]
+    )
+    snow_df = pd.DataFrame(native_df)
+    snow_df.to_snowflake(test_table_name, if_exists="replace", index=True)
+    verify_columns(test_table_name, ["number", "color", "a", "b"])
+
+    with pytest.raises(
+        ValueError, match="Length of 'index_label' should match number of levels"
+    ):
+        snow_df.to_snowflake(
+            test_table_name, if_exists="replace", index=True, index_label=["a"]
+        )
+
+
+@pytest.mark.parametrize(
+    "native_df, message",
+    [
+        (
+            native_pd.DataFrame(
+                [[1, 2], [4, 5]],
+                index=native_pd.Index([4, 5], name="b"),
+                columns=["a", "a"],
+            ),
+            re.escape(
+                "Duplicated labels ['a'] found in index columns ['b'] and data columns ['a', 'a']."
+            ),
+        ),
+        (
+            native_pd.DataFrame(
+                [1, 2, 3], index=native_pd.Index([4, 5, 6], name="a"), columns=["a"]
+            ),
+            re.escape(
+                "Duplicated labels ['a'] found in index columns ['a'] and data columns ['a']."
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_to_snowflake_index_duplicate_column_name_negative(
+    test_table_name, native_df, message
+):
+    df = pd.DataFrame(native_df)
+    with pytest.raises(ValueError, match=message):
+        df.to_snowflake(test_table_name)
+
+
+def test_to_snowflake_if_exists(session, test_table_name):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    # Verify new table is created
+    with SqlCounter(query_count=3):
+        df.to_snowflake(test_table_name, if_exists="fail", index=False)
+        verify_columns(test_table_name, ["a", "b"])
+
+    # Verify attempt to write to existing table fails
+    with SqlCounter(query_count=1):
+        with pytest.raises(ValueError):
+            df.to_snowflake(test_table_name, if_exists="fail", index=False)
+
+    # Verify by default attempt to write to existing table fails
+    with SqlCounter(query_count=1):
+        with pytest.raises(ValueError):
+            df.to_snowflake(test_table_name, index=False)
+
+    # Verify existing table is replaced with new data
+    df = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]})
+    with SqlCounter(query_count=3):
+        df.to_snowflake(test_table_name, if_exists="replace", index=False)
+        verify_columns(test_table_name, ["a", "c"])
+        verify_num_rows(session, test_table_name, 3)
+
+    # Verify data is appended to existing table
+    with SqlCounter(query_count=4):
+        df.to_snowflake(test_table_name, if_exists="append", index=False)
+        verify_columns(test_table_name, ["a", "c"])
+        verify_num_rows(session, test_table_name, 6)
+
+    # Verify pd.to_snowflake operates the same
+    with SqlCounter(query_count=4):
+        pd.to_snowflake(df, test_table_name, if_exists="append", index=False)
+        verify_columns(test_table_name, ["a", "c"])
+        verify_num_rows(session, test_table_name, 9)
+
+    # Verify invalid 'if_exists' input.
+    with SqlCounter(query_count=0):
+        with pytest.raises(ValueError):
+            df.to_snowflake(test_table_name, if_exists="abc")
+
+
+@pytest.mark.parametrize("index_label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=2)
+def test_to_snowflake_index_labels(index_label, test_table_name):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df.to_snowflake(
+        test_table_name, if_exists="replace", index=True, index_label=index_label
+    )
+    verify_columns(test_table_name, [str(index_label), "a", "b"])
+
+
+@pytest.mark.parametrize("col_name", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=2)
+def test_to_snowflake_column_names_from_panadas(col_name, test_table_name):
+    df = pd.DataFrame({col_name: [1, 2, 3], "b": [4, 5, 6]})
+    df.to_snowflake(test_table_name, if_exists="replace", index=False)
+    verify_columns(test_table_name, [str(col_name), "b"])
+
+
+@pytest.mark.parametrize("col_name", VALID_SNOWFLAKE_COLUMN_NAMES)
+@pytest.mark.parametrize("if_exists", ["append", "replace"])
+def test_column_names_with_read_snowflake_and_to_snowflake(
+    col_name, if_exists, session
+):
+    with SqlCounter(query_count=7 if if_exists == "append" else 6):
+        # Create a table
+        session.sql(f"create or replace table t1 ({col_name} int)").collect()
+        session.sql("insert into t1 values (1), (2), (3)").collect()
+        data = session.sql(f"select {col_name} from t1").collect()
+        assert len(data) == 3
+        # Capture column names of origing table.
+        expected_columns = session.table("t1").columns
+
+        df = pd.read_snowflake("t1")
+        df.to_snowflake("t1", if_exists=if_exists, index=False)
+        # Verify column names are not updated here.
+        assert expected_columns == session.table("t1").columns
+        data = session.sql(f"select {col_name} from t1").collect()
+        assert len(data) == (6 if if_exists == "append" else 3)
+
+
+@sql_count_checker(query_count=2)
+def test_to_snowflake_column_with_quotes(session, test_table_name):
+    df = pd.DataFrame({'a"b': [1, 2, 3], 'a""b': [4, 5, 6]})
+    df.to_snowflake(test_table_name, if_exists="replace", index=False)
+    verify_columns(test_table_name, ['a"b', 'a""b'])
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowflake_index_label_none_raises(test_table_name):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    message = re.escape(
+        "Label None is found in the index columns [None], which is invalid in Snowflake."
+    )
+    with pytest.raises(ValueError, match=message):
+        df.to_snowflake(test_table_name, if_exists="replace")
+
+    df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
+    )
+    with pytest.raises(ValueError, match=message):
+        df.to_snowflake(test_table_name, if_exists="replace", index_label=[None])
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowflake_data_label_none_raises(test_table_name):
+    df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [4, 5, 6]}, index=pd.Index([2, 3, 4], name="index")
+    )
+    df.columns = ["c", None]
+
+    message = re.escape(
+        "Label None is found in the data columns ['c', None], which is invalid in Snowflake."
+    )
+    with pytest.raises(ValueError, match=message):
+        df.to_snowflake(test_table_name, if_exists="replace")
+
+
+@sql_count_checker(query_count=2)
+def test_to_snowflake_with_dropped_row_position():
+    snow_df = pd.DataFrame({"a": [1, 2, 3], "b": [2, 4, 5]})
+    snow_df = snow_df.groupby("a").count().reset_index()
+    snow_df.to_snowflake("out", index=False, table_type="temporary")
+
+
+def verify_columns(table_name: str, expected: list[str]) -> None:
+    actual = pd.read_snowflake(table_name).columns
+    assert actual.tolist() == expected
+
+
+def verify_num_rows(session, table_name: str, expected: int) -> None:
+    actual = session.table(table_name).count()
+    assert actual == expected
diff --git a/tests/integ/modin/frame/test_to_snowpark.py b/tests/integ/modin/frame/test_to_snowpark.py
new file mode 100644
index 00000000000..c7967befaec
--- /dev/null
+++ b/tests/integ/modin/frame/test_to_snowpark.py
@@ -0,0 +1,204 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    extract_pandas_label_from_snowflake_quoted_identifier,
+)
+from snowflake.snowpark.row import Row
+from snowflake.snowpark.types import DoubleType, LongType, StringType
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.utils import Utils
+
+
+@pytest.fixture(scope="function")
+def tmp_table_basic(session):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, "id integer, foot_size float, shoe_model varchar"
+    )
+    session.sql(f"insert into {table_name} values (1, 32.0, 'medium')").collect()
+    session.sql(f"insert into {table_name} values (2, 27.0, 'small')").collect()
+    session.sql(f"insert into {table_name} values (3, 40.0, 'large')").collect()
+
+    try:
+        yield table_name
+    finally:
+        Utils.drop_table(session, table_name)
+
+
+@pytest.fixture(scope="function")
+def native_pandas_df_basic():
+    native_df = native_pd.DataFrame(
+        {
+            "ID": [1, 2, 3],
+            "FOOT_SIZE": [32.0, 27.0, 40.0],
+            "SHOE_MODEL": ["medium", "small", "large"],
+        }
+    )
+    native_df = native_df.set_index("ID")
+    return native_df
+
+
+@pytest.mark.parametrize("index", [True, False])
+@sql_count_checker(query_count=2)
+def test_to_snowpark_with_read_snowflake(tmp_table_basic, index) -> None:
+    snow_dataframe = pd.read_snowflake(tmp_table_basic)
+    index_label = None
+    if index:
+        index_label = "row_number"
+    snowpark_df = snow_dataframe.to_snowpark(index=index, index_label=index_label)
+
+    # verify the default index column row position column is included
+    start = 0
+    if index:
+        assert snowpark_df.schema[start].column_identifier == '"row_number"'
+        assert isinstance(snowpark_df.schema[start].datatype, LongType)
+        start += 1
+    # verify the rest of data column is included
+    assert snowpark_df.schema[start].column_identifier.name == "ID"
+    assert snowpark_df.schema[start].column_identifier.quoted_name == '"ID"'
+    assert isinstance(snowpark_df.schema[start].datatype, LongType)
+    assert snowpark_df.schema[start + 1].column_identifier.name == "FOOT_SIZE"
+    assert snowpark_df.schema[start + 1].column_identifier.quoted_name == '"FOOT_SIZE"'
+    assert isinstance(snowpark_df.schema[start + 1].datatype, DoubleType)
+    assert snowpark_df.schema[start + 2].column_identifier.name == "SHOE_MODEL"
+    assert snowpark_df.schema[start + 2].column_identifier.quoted_name == '"SHOE_MODEL"'
+    assert isinstance(snowpark_df.schema[start + 2].datatype, StringType)
+
+    # verify the snowpark content
+    res = (
+        snowpark_df.order_by("ID")
+        .select(['"ID"', '"FOOT_SIZE"', '"SHOE_MODEL"'])
+        .collect()
+    )
+    assert res[0] == Row(ID=1, FOOT_SIZE=32.0, SHOE_MODEL="medium")
+    assert res[1] == Row(ID=2, FOOT_SIZE=27.0, SHOE_MODEL="small")
+    assert res[2] == Row(ID=3, FOOT_SIZE=40.0, SHOE_MODEL="large")
+
+
+@sql_count_checker(query_count=1)
+def test_to_snowpark_from_pandas_df(native_pandas_df_basic) -> None:
+    snow_dataframe = pd.DataFrame(native_pandas_df_basic)
+    snowpark_df = snow_dataframe.to_snowpark()
+
+    # verify the index column is included
+    assert snowpark_df.schema[0].column_identifier.quoted_name == '"ID"'
+    assert isinstance(snowpark_df.schema[0].datatype, LongType)
+    # verify the rest of data column is included
+    assert snowpark_df.schema[1].column_identifier.name == "FOOT_SIZE"
+    assert snowpark_df.schema[1].column_identifier.quoted_name == '"FOOT_SIZE"'
+    assert isinstance(snowpark_df.schema[1].datatype, DoubleType)
+    assert snowpark_df.schema[2].column_identifier.name == "SHOE_MODEL"
+    assert snowpark_df.schema[2].column_identifier.quoted_name == '"SHOE_MODEL"'
+    assert isinstance(snowpark_df.schema[2].datatype, StringType)
+
+    # verify the snowpark content
+    res = (
+        snowpark_df.order_by("ID")
+        .select(['"ID"', '"FOOT_SIZE"', '"SHOE_MODEL"'])
+        .collect()
+    )
+    assert res[0] == Row(ID=1, FOOT_SIZE=32.0, SHOE_MODEL="medium")
+    assert res[1] == Row(ID=2, FOOT_SIZE=27.0, SHOE_MODEL="small")
+    assert res[2] == Row(ID=3, FOOT_SIZE=40.0, SHOE_MODEL="large")
+
+
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("index_labels", [None, ["index1", "index2"]])
+@sql_count_checker(query_count=0)
+def test_to_snowpark_with_multiindex(tmp_table_basic, index, index_labels) -> None:
+    multiindex = native_pd.MultiIndex.from_arrays(
+        [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
+    )
+    native_df = native_pd.DataFrame(
+        [[1] * 2, [2] * 2, [3] * 2, [4] * 2], index=multiindex, columns=["a", "b"]
+    )
+    snow_dataframe = pd.DataFrame(native_df)
+    snowpark_df = snow_dataframe.to_snowpark(index, index_labels)
+
+    start = 0
+    if index:
+        # verify index columns are first couple schema
+        for (i, index_label) in enumerate(multiindex.names):
+            quoted_identifier = snowpark_df.schema[i].column_identifier.quoted_name
+            label_to_match = index_label if not index_labels else index_labels[i]
+            assert (
+                label_to_match
+                == extract_pandas_label_from_snowflake_quoted_identifier(
+                    quoted_identifier
+                )
+            )
+            start += 1
+    # verify the data columns
+    assert snowpark_df.schema[start].column_identifier.quoted_name == '"a"'
+    assert snowpark_df.schema[start + 1].column_identifier.quoted_name == '"b"'
+
+
+@sql_count_checker(query_count=1)
+def test_to_snowpark_with_operations(session, tmp_table_basic) -> None:
+    snowpandas_df = pd.read_snowflake(tmp_table_basic)
+    # rename FOOT_SIZE to size and SHOE_MODEL to model
+    snowpandas_df = snowpandas_df.rename(
+        columns={"FOOT_SIZE": "size", "SHOE_MODEL": "model"}
+    )
+    # set size and ID as new index
+    snowpandas_df = snowpandas_df.set_index(["size", "ID"])
+
+    snowpark_df = snowpandas_df.to_snowpark()
+
+    # verify the index column is included, and in order of size and ID
+    assert snowpark_df.schema[0].column_identifier.quoted_name == '"size"'
+    assert isinstance(snowpark_df.schema[0].datatype, DoubleType)
+    assert snowpark_df.schema[1].column_identifier.quoted_name == '"ID"'
+    assert isinstance(snowpark_df.schema[1].datatype, LongType)
+    # verify the rest of data column is included
+    assert snowpark_df.schema[2].column_identifier.quoted_name == '"model"'
+    assert isinstance(snowpark_df.schema[2].datatype, StringType)
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_with_duplicated_columns_raise(native_pandas_df_basic) -> None:
+    snow_dataframe = pd.DataFrame(native_pandas_df_basic)
+    # rename to columns to have the same column name
+    snow_dataframe.columns = ["shoe_info", "shoe_info"]
+
+    message = re.escape(
+        "Duplicated labels ['shoe_info'] found in index columns ['ID'] and data columns ['shoe_info', 'shoe_info']. "
+        "Snowflake does not allow duplicated identifiers, please rename to make sure there is no duplication among both index and data columns."
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_dataframe.to_snowpark()
+
+
+@sql_count_checker(query_count=1)
+def test_to_snowpark_with_none_index_label_raises(tmp_table_basic) -> None:
+    snow_dataframe = pd.read_snowflake(tmp_table_basic)
+
+    message = re.escape(
+        "Label None is found in the index columns [None], which is invalid in Snowflake. "
+        "Please give it a name by passing index_label arguments."
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_dataframe.to_snowpark()
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_with_none_data_label_raises(native_pandas_df_basic) -> None:
+    snow_dataframe = pd.DataFrame(native_pandas_df_basic)
+    snow_dataframe.columns = ["size", None]
+
+    message = re.escape(
+        "Label None is found in the data columns ['size', None], which is invalid in Snowflake. "
+        "Please give it a name by set the dataframe columns like df.columns=['A', 'B'], "
+        "or set the series name if it is a series like series.name='A'."
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_dataframe.to_snowpark()
diff --git a/tests/integ/modin/frame/test_transpose.py b/tests/integ/modin/frame/test_transpose.py
new file mode 100644
index 00000000000..aba9e784fd1
--- /dev/null
+++ b/tests/integ/modin/frame/test_transpose.py
@@ -0,0 +1,350 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.unpivot_utils import (
+    UNPIVOT_NULL_REPLACE_VALUE,
+)
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+transpose_and_double_transpose_parameterize = pytest.mark.parametrize(
+    "transpose_operation, expected_query_count",
+    [(lambda df: df.T, 1), (lambda df: df.T.T, 1)],
+)
+
+
+def update_columns_index(df, new_columns_index):
+    columns = pd.MultiIndex.from_tuples(new_columns_index)
+    df.columns = columns
+    return df
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_default_index(
+    transpose_operation, expected_query_count, score_test_data
+):
+    snow_df = pd.DataFrame(score_test_data)
+    native_df = native_pd.DataFrame(score_test_data)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(snow_df, native_df, transpose_operation)
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_set_single_index(
+    transpose_operation, expected_query_count, score_test_data
+):
+    snow_df = pd.DataFrame(score_test_data)
+    native_df = native_pd.DataFrame(score_test_data)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: transpose_operation(df.set_index(["name"])),
+        )
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_set_multi_index(
+    transpose_operation, expected_query_count, score_test_data
+):
+    snow_df = pd.DataFrame(score_test_data)
+    native_df = native_pd.DataFrame(score_test_data)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: transpose_operation(df.set_index(["name", "score"])),
+        )
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_set_columns_multi_index(
+    transpose_operation, expected_query_count, score_test_data
+):
+    snow_df = pd.DataFrame(score_test_data)
+    native_df = native_pd.DataFrame(score_test_data)
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: transpose_operation(
+                update_columns_index(
+                    df,
+                    [
+                        ("id", "name"),
+                        ("id", "score"),
+                        ("status", "employed"),
+                        ("status", "kids"),
+                    ],
+                )
+            ),
+        )
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_set_columns_multi_index_mixed_types(
+    transpose_operation, expected_query_count, score_test_data
+):
+    snow_df = pd.DataFrame(score_test_data)
+    native_df = native_pd.DataFrame(score_test_data)
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: transpose_operation(
+                update_columns_index(
+                    df,
+                    [
+                        (123, False),
+                        (456, True),
+                        (789, None),
+                        (0, True),
+                    ],
+                )
+            ),
+            comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+        )
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_both_multi_index(
+    transpose_operation, expected_query_count, score_test_data
+):
+    native_df = native_pd.DataFrame(score_test_data)
+    snow_df = pd.DataFrame(score_test_data)
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: transpose_operation(
+                update_columns_index(
+                    df.set_index(["name", "score"]),
+                    [("status", "employed"), ("family", "kids")],
+                )
+            ),
+        )
+
+
+@transpose_and_double_transpose_parameterize
+def test_dataframe_transpose_single_row(transpose_operation, expected_query_count):
+    single_row_data = {"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6]}
+    native_df = native_pd.DataFrame(single_row_data)
+    snow_df = pd.DataFrame(single_row_data)
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            transpose_operation,
+        )
+
+
+@pytest.mark.parametrize(
+    "single_column_data",
+    [
+        {"A": [1, 2, 3, 4, 5, 6]},
+        {"B": [1, 1, 1, 1, 1]},  # value all same
+        {"None": [None]},
+        {"none": [None, None, None]},
+        {UNPIVOT_NULL_REPLACE_VALUE: [np.nan, np.nan, np.nan]},
+        {"NaT": [pd.NaT, pd.NaT, pd.NaT]},
+        {"nan": [6.0, 7.1, np.nan]},
+        {"A": [None, 1, 2, 3]},
+        {"None": [None, 1, 1, None]},
+        {"float": [1.1, 1.0 / 7]},
+        {"str": ["abc", None, ("a", "c")]},
+        {
+            UNPIVOT_NULL_REPLACE_VALUE: [
+                None,
+                UNPIVOT_NULL_REPLACE_VALUE,
+                None,
+                UNPIVOT_NULL_REPLACE_VALUE,
+            ]
+        },
+        {123: ["a", "b", "c"]},
+        {False: [1, 2, 3], True: ["4", "5", "6"]},
+        # Note that if no label is provided, a default integer label is created.
+        [1.0, np.nan, 2, 4],
+        # Snowpark PIVOT(MIN(col)) doesn't work on ARRAY and MAP types: this must get explicitly cast
+        # to VARIANT during our post-processing
+        {"array": [[1, 2, 3]]},
+        {"col": {"row": {"key": "value"}}},
+    ],
+)
+@sql_count_checker(query_count=1, union_count=1)
+def test_dataframe_transpose_single_column(single_column_data):
+    native_df = native_pd.DataFrame(single_column_data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+@sql_count_checker(query_count=1, union_count=1)
+def test_dataframe_transpose_preserve_int_dtypes():
+    bigint = 2**32 + 1
+    data = {
+        "A": [bigint, bigint + 1],
+        "B": [bigint + 2, bigint + 3],
+        "C": [bigint + 4, bigint + 5],
+    }
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(data)
+
+    # We do full comparison with dtypes (should be Int64) to validate they are preserved through transpose.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.T,
+        comparator=assert_snowpark_pandas_equal_to_pandas,
+    )
+
+    assert all([dtype == "int64" for dtype in snow_df.T.dtypes])
+
+
+@sql_count_checker(query_count=1, union_count=1)
+def test_dataframe_transpose_preserve_float_dtypes():
+    data = {
+        "A": [1.5, 2.3],
+        "B": [3.14, 2.718],
+        "C": [99.9, 101.1],
+    }
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(data)
+
+    # We do full comparison with dtypes (should be Int64) to validate they are preserved through transpose.
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.T,
+        comparator=assert_snowpark_pandas_equal_to_pandas,
+    )
+    assert all([dtype == "float64" for dtype in snow_df.T.dtypes])
+
+
+@sql_count_checker(query_count=1, union_count=1)
+def test_dataframe_transpose_single_numeric_column():
+    single_column_data = ({0: "A", 1: "B", 2: "C", 3: "D"},)
+    native_df = native_pd.DataFrame(single_column_data, index=(0,))
+    snow_df = pd.DataFrame(single_column_data, index=(0,))
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+@sql_count_checker(query_count=1, union_count=1)
+def test_dataframe_all_missing():
+    native_df = native_pd.DataFrame(
+        {
+            "nat": [native_pd.NaT, native_pd.NaT, native_pd.NaT],
+            "nan": [np.nan, np.nan, np.nan],
+            "none": [None, None, None],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+@pytest.mark.parametrize(
+    "test_args, expected_query_count",
+    [
+        ((None, None), 2),
+        (([1, 2, 3], None), 2),
+        ((None, ["a", "b", "c"]), 1),
+    ],
+)
+def test_dataframe_transpose_empty(test_args, expected_query_count):
+    index = test_args[0]
+    columns = test_args[1]
+    native_df = native_pd.DataFrame(index=index, columns=columns)
+    snow_df = pd.DataFrame(index=index, columns=columns)
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snow_df, native_df, lambda df: df.T, check_column_type=False
+        )
+
+
+@pytest.mark.skip("SNOW-896260 Pivot with empty column values fails")
+def test_dataframe_transpose_empty_with_failing_values():
+    index = [1, 2, 3]
+    columns = ["a", "b", "c"]
+    native_df = native_pd.DataFrame(index=index, columns=columns)
+    snow_df = pd.DataFrame(index=index, columns=columns)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.skip(
+    reason="SNOW-928130: Fallback converts non-json serializable to string type failed inteferred_type check"
+)
+@pytest.mark.parametrize(
+    "col_label",
+    [
+        native_pd.Timestamp(year=2023, month=9, day=29),
+        datetime.datetime(year=2023, month=9, day=29),
+    ],
+)
+def test_dataframe_transpose_not_json_serializable_fallback(col_label, score_test_data):
+    test_data = score_test_data.copy()
+    test_data[col_label] = test_data["name"]
+
+    snow_df = pd.DataFrame(test_data)
+    native_df = native_pd.DataFrame(test_data)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+# This will succeed in pandas but fail in snowpark pandas
+@pytest.mark.xfail(
+    reason="SNOW-896985: Support non-JSON serializable types in dataframe"
+)
+@sql_count_checker(query_count=5)
+def test_dataframe_transpose_object_data():
+    class CustomObject:
+        pass
+
+    single_column_data = [123]
+    columns = [CustomObject()]
+
+    native_df = native_pd.DataFrame(single_column_data, columns=columns)
+    snow_df = pd.DataFrame(single_column_data, columns=columns)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.T)
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_transpose_copy_warning_log(caplog, score_test_data):
+    pd.DataFrame().transpose(copy=True)
+
+    assert (
+        "Single Warning: The argument `copy` of `transpose` has been ignored by Snowpark pandas API:\n"
+        "Transpose ignore copy argument in Snowpark pandas API. was raised."
+        in [r.msg for r in caplog.records]
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_transpose_args_warning_log(caplog, score_test_data):
+    pd.DataFrame().transpose("foo", "bar")
+
+    assert (
+        "The argument `args` of `transpose` has been ignored by Snowpark pandas API:\n"
+        "Transpose ignores args in Snowpark pandas API."
+        in [r.msg for r in caplog.records]
+    )
diff --git a/tests/integ/modin/frame/test_unary_op.py b/tests/integ/modin/frame/test_unary_op.py
new file mode 100644
index 00000000000..9a65838ddee
--- /dev/null
+++ b/tests/integ/modin/frame/test_unary_op.py
@@ -0,0 +1,147 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.series.test_unary_op import cast_using_snowflake_rules
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+unary_operators = pytest.mark.parametrize("func", [abs, lambda x: -x])
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_df_unary_all_pos(func):
+    data = [[10, 1, 1.5], [3, 2, 0]]
+
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_df_unary_mixed_dtypes(func):
+    data = [[-10, 1, 1.5], [100000, math.e, 3], [-100000, math.pi, 1]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@unary_operators
+@pytest.mark.parametrize(
+    "index",
+    [
+        "a",
+        ["a", "b"],
+        ["c", "a"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_df_unary_index(func, index):
+    data = [[-10, 1, 1.5], [100000, math.e, np.nan], [-100000, math.pi, 1]]
+    native_df = native_pd.DataFrame(data, columns=["a", "b", "c"]).set_index(index)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_df_unary_np_types(func):
+    data = [
+        [-np.int16(1), 1, 1.5],
+        [100000, math.e, np.float64(32.33)],
+        [-np.double(2.5), math.pi, np.int8(3)],
+    ]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@unary_operators
+@pytest.mark.parametrize("value_to_test", [True, False, None])
+@sql_count_checker(query_count=1)
+def test_df_unary_np_none_bool(func, value_to_test):
+    data = [[1.33, 2.33, value_to_test], [3, np.int8(15), np.float16(123.123132)]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(snow_df, native_df, func)
+
+
+@unary_operators
+@pytest.mark.parametrize("data", [[1.33, None, False, -3]])
+@sql_count_checker(query_count=1)
+def test_df_unary_invalid_in_native_negative(func, data):
+    # testing and documenting behaviors that work in SF
+    # but not in native pandas.
+    input_df = np.transpose([data, [3, -np.int8(15), np.float16(123.123132), 1.33]])
+
+    expected_df = native_pd.DataFrame(
+        [[cast_using_snowflake_rules(func, x) for x in row] for row in input_df]
+    )
+    snow_df = pd.DataFrame(input_df)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        func(snow_df),
+        native_pd.DataFrame(expected_df),
+    )
+
+    with pytest.raises(TypeError, match="bad operand type"):
+        func(input_df)
+
+
+@unary_operators
+@pytest.mark.parametrize("data", [[True, True], [False, False], [False, True]])
+@sql_count_checker(query_count=0)
+def test_df_unary_invalid_in_sf_negative(func, data):
+    # testing and documenting behaviors that work in native
+    # pandas but not in SF
+    df = [[data[0], -np.float64(23.33)], [data[1], -3]]
+    snow_df = pd.DataFrame(df)
+    with pytest.raises(
+        SnowparkSQLException, match="Invalid argument types for function"
+    ):
+        func(snow_df).to_pandas()
+
+
+@unary_operators
+@pytest.mark.parametrize(
+    "invalid_value, expected_sf_error",
+    [
+        ([None, False, False], "Invalid argument types for function"),
+        ([False, True, None], "Invalid argument types for function"),
+        (["string_1", "string_2", "string_3"], "is not recognized"),
+        ([3, -np.int8(3), "bad_str"], " Numeric value 'bad_str' is not recognized"),
+    ],
+)
+def test_ser_unary_invalid_in_both_native_and_sf_negative(
+    func, invalid_value, expected_sf_error
+):
+
+    invalid_df = np.transpose([invalid_value, [-np.float16(23.333), 3, -9]])
+    native_df = native_pd.DataFrame(invalid_df)
+    snow_df = pd.DataFrame(invalid_df)
+
+    with pytest.raises(TypeError, match="bad operand type"):
+        func(native_df)
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException, match=expected_sf_error):
+            func(snow_df).to_pandas()
diff --git a/tests/integ/modin/frame/test_value_counts.py b/tests/integ/modin/frame/test_value_counts.py
new file mode 100644
index 00000000000..9005f3e6c76
--- /dev/null
+++ b/tests/integ/modin/frame/test_value_counts.py
@@ -0,0 +1,195 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+TEST_DATA = [
+    {
+        "A": [1, 2, 2, 3, 3, 3],
+        "B": [2, 3, 3, 1, 1, 1],
+    },
+    {
+        "A": ["1", "2", "2", "3", "3", "3"],
+        "B": ["2", "3", "3", "1", "1", "1"],
+    },
+]
+
+
+TEST_NULL_DATA = [
+    {
+        "A": [1, 2, 2, 3, 3, 3, None, np.nan, 4],
+        "B": [2, 3, 3, 1, 1, 1, pd.NA, 4, np.nan],
+    },
+    {
+        "A": ["1", "2", "2", "3", "3", "3", None],
+        "B": ["2", "3", "3", "1", "1", "1", pd.NA],
+    },
+]
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("on_index", [True, False])
+@pytest.mark.parametrize(
+    "subset",
+    [None, "A", "B", ["A"], ["B"], ["A", "B"], ["A", "A", "B"], ["B", "B", "A"]],
+)
+@sql_count_checker(query_count=1)
+def test_value_counts_subset(test_data, on_index, subset):
+    snow_df = pd.DataFrame(test_data)
+    native_df = native_pd.DataFrame(test_data)
+    if on_index:
+        snow_df = snow_df.set_index("A")
+        native_df = native_df.set_index("A")
+    snow_result = snow_df.value_counts(subset=subset)
+    native_result = native_df.value_counts(subset=subset)
+    if native_result.index.nlevels == 1:
+        # In pandas, even if only counting on one column will result in MultiIndex
+        # e.g., instead of returning Index([3, 2, 1], dtype='int64', name='A'),
+        # pandas returns MultiIndex([(3,), (2,), (1,)], names=['A'])
+        # modin has the same issue but it's not resolved yet
+        # https://github.com/modin-project/modin/issues/3411
+        native_result.index = native_result.index.get_level_values(0)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_result, native_result
+    )
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("subset", [["A", "C"], []])
+def test_value_counts_subset_negative(test_data, subset):
+    snow_df = pd.DataFrame(test_data)
+    native_df = native_pd.DataFrame(test_data)
+
+    with SqlCounter(query_count=1 if len(subset) > 0 else 0):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda x: x.value_counts(subset=subset),
+            expect_exception=True,
+        )
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@sql_count_checker(query_count=1)
+def test_value_counts_duplicate(test_data):
+    native_df = native_pd.DataFrame(test_data)
+    native_df["C"] = 1
+    native_df.columns = ["A", "B", "B"]
+    snow_df = pd.DataFrame(native_df)
+
+    # getting unique column works
+    snow_result = snow_df.value_counts(subset=["A"])
+    native_result = native_df.value_counts(subset=["A"])
+    native_result.index = native_result.index.get_level_values(0)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_result, native_result
+    )
+
+    # negative cases
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.value_counts(),
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=ValueError,
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.value_counts(subset=["B"]),
+        expect_exception=True,
+        assert_exception_equal=False,
+        expect_exception_type=ValueError,
+    )
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_sort_ascending(test_data, sort, ascending):
+    snow_df = pd.DataFrame(test_data)
+    native_df = native_pd.DataFrame(test_data)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.value_counts(sort=sort, ascending=ascending),
+    )
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("has_name", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_normalize(test_data, has_name):
+    snow_df = pd.DataFrame(test_data).value_counts(normalize=True)
+    native_df = native_pd.DataFrame(test_data).value_counts(normalize=True)
+    # snowpark pandas will return a series with decimal type
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+
+
+@pytest.mark.parametrize("test_data", TEST_NULL_DATA)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_dropna(test_data, dropna):
+    if test_data == TEST_NULL_DATA[0] and not dropna:
+        pytest.xfail(
+            reason="SNOW-1201658"
+            # At a glance, the difference between the Snowpark pandas and native pandas output
+            # for dropna=False with this data is just that two rows in the index are swapped, as
+            # native pandas puts the row (nan, nan) before (nan, 4.0). Normally, this can be circumvented
+            # by passing `check_like=True`, but after pandas 2.1.4, the internal call to reindex_like
+            # this triggers is mysteriously casting the Snowpark pandas result to float64 and dropping
+            # some values. We could not quickly produce a minimal example of this bug, and manually
+            # reconstructing the results of the values_count() calls and calling reindex_like on them
+            # did not produce the same error. The only difference observed from setting breakpoints in
+            # the test is that the MultiIndex generated by Snowpark pandas has a smaller `levels` list than
+            # that produced by native pandas:
+            #
+            # (Pdb) snow_to_native.index.levels
+            # FrozenList([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]])
+            # (Pdb) expected_pandas.index.levels
+            # FrozenList([[1.0, 2.0, 3.0, 4.0, nan], [1.0, 2.0, 3.0, nan, 4.0]])
+            #
+            # TODO: Further investigation is needed to determine why this behavior occurs, and why it changes
+            # the reindex_like call as well.
+        )
+    snow_df = pd.DataFrame(test_data)
+    native_df = native_pd.DataFrame(test_data)
+    # if NULL value is not dropped, the index will contain NULL
+    # Snowpark pandas returns string type but pandas returns mixed type
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda x: x.value_counts(dropna=dropna),
+        check_index_type=dropna,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_non_existing_labels():
+    # when subset contains non-existing labels, it will trigger fallback
+    # because of function `get_frame_with_groupby_columns_as_index`
+    snow_df = pd.DataFrame({"A": [1, 2, 3]})
+    native_df = native_pd.DataFrame({"A": [1, 2, 3]})
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda x: x.value_counts(subset=["A", "B", "C"])
+    )
diff --git a/tests/integ/modin/frame/test_where.py b/tests/integ/modin/frame/test_where.py
new file mode 100644
index 00000000000..7f591765b70
--- /dev/null
+++ b/tests/integ/modin/frame/test_where.py
@@ -0,0 +1,993 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import re
+from typing import Any, Optional, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="module")
+def test_data():
+    return {
+        "A": [1, 2, 3, 4, 5],
+        "B": [2, 3, 4, 5, 6],
+        "C": [3, 4, 5, 6, 7],
+        "D": [4, 5, 6, 7, 8],
+        "E": [5, 6, 7, 8, 9],
+    }
+
+
+@pytest.fixture(scope="module")
+def test_cond():
+    return {
+        "A": [True, False, True, False, True],
+        "B": [False, True, False, True, False],
+        "C": [True, False, True, False, True],
+        "D": [False, True, False, True, False],
+        "E": [True, False, True, False, True],
+    }
+
+
+@pytest.fixture(scope="module")
+def test_others():
+    return {
+        "A": [200, 201, 202, 203, 204],
+        "B": [300, 301, 302, 303, 304],
+        "C": [400, 401, 402, 403, 404],
+        "D": [500, 501, 502, 503, 504],
+        "E": [600, 601, 602, 603, 604],
+    }
+
+
+def make_test_dataframe(df_data, preprocess_df=None):
+    if isinstance(df_data, tuple):
+        native_df = native_pd.DataFrame(df_data[0], columns=df_data[1])
+        snow_df = pd.DataFrame(df_data[0], columns=df_data[1])
+    else:
+        native_df = native_pd.DataFrame(df_data)
+        snow_df = pd.DataFrame(df_data)
+
+    if preprocess_df:
+        native_df = preprocess_df(native_df)
+        snow_df = preprocess_df(snow_df)
+
+    return native_df, snow_df
+
+
+def dataframe_with_test_args(df, test_args):
+    if test_args:
+        if test_args[0]:
+            df.set_index(test_args[0], inplace=True)
+        if test_args[1]:
+            df.columns = test_args[1]
+    return df
+
+
+def make_native_dataframe_or_scalar(test_data, test_args):
+    if isinstance(test_data, tuple):
+        df = native_pd.DataFrame(test_data[0], columns=test_data[1])
+    elif isinstance(test_data, dict):
+        df = native_pd.DataFrame(test_data)
+    else:
+        # It is not a dataframe, but rather a scalar value or callable
+        return test_data
+
+    return dataframe_with_test_args(df, test_args)
+
+
+def make_snow_dataframe(test_data, test_args):
+    if isinstance(test_data, tuple):
+        df = pd.DataFrame(test_data[0], columns=test_data[1])
+    elif isinstance(test_data, dict):
+        df = pd.DataFrame(test_data)
+    else:
+        # It is not a dataframe, but rather a scalar value
+        return test_data
+
+    return dataframe_with_test_args(df, test_args)
+
+
+def where_test_helper(
+    df_data_list: list[Union[list, dict, np.array, Any]],
+    df_data_args: Optional[list[tuple[Any]]] = None,
+    coerce_to_float64: bool = True,
+    expect_exception: bool = False,
+    expect_exception_type: Optional[type[Exception]] = None,
+    expect_exception_match: Optional[str] = None,
+    assert_exception_equal: bool = True,
+    extra_where_args: Optional[dict[Any, Any]] = None,
+):
+    """
+    Helper for validating pivot_table tests, specifically this ensures the output is normalized to float64
+    with acceptable precision so can compare the results if coerce_to_float64 is True.
+
+    df_data_list: The raw data to be put in df as list of data/columns, dictionary of data values (col:series) or np.array
+    df_data_args: Arguments to pass to `make_{native|snow}_dataframe{_or_scalar}
+    coerce_to_float64: Coerce the results to float64 result since irrational numbers can result in object type
+    expect_exception: Whether the call *should* raise an exception
+    expect_exception_type: if not None, assert the exception type is expected
+    expect_exception_match: if not None, assert the exception match the expected regex
+    assert_exception_equal: bool. Whether to assert the exception from Snowpark pandas eqauls to pandas
+    extra_where_args: Additional arguments to pass to where as a dictionary.
+    """
+    extra_where_args = extra_where_args or {}
+    df_data_args = df_data_args or [None] * len(df_data_list)
+    native_dfs = [
+        make_native_dataframe_or_scalar(data, data_args)
+        for data, data_args in zip(df_data_list, df_data_args)
+    ]
+    snow_dfs = [
+        make_snow_dataframe(data, data_args)
+        for data, data_args in zip(df_data_list, df_data_args)
+    ]
+
+    eval_snowpark_pandas_result(
+        snow_dfs[0],
+        native_dfs[0],
+        lambda df: df.where(native_dfs[1], native_dfs[2], **extra_where_args)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(snow_dfs[1], snow_dfs[2], **extra_where_args),
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64
+        if coerce_to_float64
+        else assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+        expect_exception=expect_exception,
+        expect_exception_type=expect_exception_type,
+        expect_exception_match=expect_exception_match,
+        assert_exception_equal=assert_exception_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["E", "D", "C"]),
+            (["A", "B"], ["C", "E", "D"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "C"]),
+            (["A", "B"], ["X", "Y", "C"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+        ),
+        (
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_cond_and_others_unmatching_data_column_incompatible_index_type(
+    test_data, test_cond, test_others, test_args
+):
+    where_test_helper([test_data, test_cond, test_others], test_args)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_cond_and_others_unmatching_data_column_compatible_index_type(
+    test_data, test_cond, test_others
+):
+    where_test_helper([test_data, test_cond, test_others], (None, None, None))
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (None, None, None),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["E", "D", "C"]),
+            (["A", "B"], ["C", "E", "D"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "C"]),
+            (["A", "B"], ["X", "Y", "C"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+            (["A", "B"], ["C", "D", "E"]),
+        ),
+        (
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["C", "D", "E"]),
+            (["A", "B"], ["X", "Y", "Z"]),
+        ),
+        (
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+            (["A", "B", "C", "D"], ["E"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_cond_and_others_matching_index(
+    test_data, test_cond, test_others, test_args
+):
+    data = test_data.copy()
+    cond = test_cond.copy()
+    others = test_others.copy()
+
+    # Ensure they have matching index values or won't get any results
+    if test_args and test_args[0]:
+        for col in test_args[0][0]:
+            cond[col] = data[col]
+            others[col] = data[col]
+
+    where_test_helper([data, cond, others], test_args)
+
+
+@pytest.mark.parametrize(
+    "test_args",
+    [
+        # The test arguments specify different index & column combinations
+        # ([(data index, data columns)], ([cond index], [cond columns]), ([other index], [other columns]))
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["B", "C", "D", "E"]),
+        ),
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["E", "D", "C", "B"]),
+            (["A"], ["B", "D", "C", "E"]),
+        ),
+        (
+            (["A"], ["B", "C", "D", "E"]),
+            (["A"], ["F", "G", "H", "I"]),
+            (["A"], ["J", "K", "L", "M"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_cond_and_others_succeed_in_snowflake(
+    test_data, test_cond, test_others, test_args
+):
+    snow_df = pd.DataFrame(test_data)
+    snow_cond_df = pd.DataFrame(test_cond)
+    snow_other_df = pd.DataFrame(test_others)
+
+    # Note the corresponding where in pandas fails in what looks like a bug since the mask and data sizes are the
+    # same in contrast to the error message.
+    #
+    # 'ValueError: putmask: mask and data must be the same size
+
+    snow_result_df = snow_df.where(snow_cond_df, snow_other_df)
+
+    native_df = native_pd.DataFrame(test_data)
+    native_cond_df = native_pd.DataFrame(test_cond)
+    native_other_df = native_pd.DataFrame(test_others)
+
+    expected_result_df = native_df.apply(
+        lambda x: native_pd.Series(
+            list(
+                map(
+                    lambda t: t[0] if t[1] else t[2],
+                    list(
+                        zip(
+                            native_df[x.name].to_numpy(),
+                            native_cond_df[x.name].to_numpy(),
+                            native_other_df[x.name].to_numpy(),
+                        )
+                    ),
+                )
+            ),
+            name=x.name,
+        )
+    )
+
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+        snow_result_df, expected_result_df
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_test",
+    [
+        lambda x: x >= 6,
+        lambda x: x % 2 == 0,
+    ],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_dataframe_where_with_cond_is_lambda(test_data, cond_test):
+    where_test_helper([test_data, cond_test, None], [[["A", "B"], None], None, None])
+
+
+@pytest.mark.parametrize(
+    "cond_test",
+    [
+        lambda x: True,
+        lambda x: False,
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dataframe_where_with_cond_is_lambda_true_and_false(test_data, cond_test):
+    where_test_helper(
+        [test_data, cond_test, None],
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+        expect_exception_match="Array conditional must be same shape as self",
+    )
+
+
+@pytest.mark.parametrize(
+    "other_test, expected_query_count",
+    [
+        (lambda x: x**2, 1),
+        (
+            lambda x: x + x,
+            1,
+        ),
+        (lambda x: x, 1),
+        (lambda y: y + 10, 1),
+    ],
+)
+def test_dataframe_where_with_other_is_lambda(
+    test_data, test_cond, other_test, expected_query_count
+):
+    cond = test_cond.copy()
+    cond["A"] = test_data["A"]
+    cond["B"] = test_data["B"]
+    with SqlCounter(
+        query_count=expected_query_count,
+    ):
+        where_test_helper(
+            [test_data, cond, other_test],
+            [[["A", "B"], None], [["A", "B"], None], None],
+        )
+
+
+@pytest.mark.parametrize("test_other_scalar", [None, -1, 99.9])
+@sql_count_checker(query_count=1, join_count=1)
+def test_dataframe_where_with_cond_and_scalar_others(
+    test_data, test_cond, test_other_scalar
+):
+    where_test_helper([test_data, test_cond, test_other_scalar])
+
+
+@pytest.mark.parametrize(
+    "test_other_scalar",
+    [
+        123,
+        99.987,
+        "x",
+        True,
+        lambda x: "x" if min(x) == 3 else "y",
+        dict(zip(list("ABCDE"), [list("ABCDE")] * 5)),
+    ],
+)
+def test_dataframe_where_with_cond_and_scalar_others_with_type_incompatible(
+    test_data, test_cond, test_other_scalar
+):
+    expected_join_count = 2 if isinstance(test_other_scalar, dict) else 1
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        where_test_helper([test_data, test_cond, test_other_scalar])
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_where_cond_non_boolean_negative_test(
+    test_data, test_cond, test_others
+):
+    test_cond2 = test_cond.copy()
+    test_cond2["C"] = ["a", "b", "c", "d", "e"]
+
+    where_test_helper(
+        [test_data, test_cond2, test_others],
+        expect_exception=True,
+        expect_exception_match="Boolean array expected for the condition, not object",
+        expect_exception_type=ValueError,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_where_cond_is_none_negative(test_data):
+    where_test_helper(
+        [test_data, None, None],
+        expect_exception=True,
+        expect_exception_match=r"Array conditional must be same shape as self",
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=12, fallback_count=1, sproc_count=1)
+def test_dataframe_where_with_fallback(test_data, test_cond, test_others):
+    index_data = [["A", "B"], ["C", "D", "E"]]
+
+    where_test_helper(
+        [test_data, test_cond, test_others],
+        [index_data, index_data, index_data],
+        extra_where_args={"axis": 1},
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_dataframe_where_cond_is_array(caplog):
+    data = [[1, 2], [3, 4]]
+    cond = np.array([[True, False], [False, True]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.where(cond))
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_where_cond_is_array_wrong_size_negative():
+    data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]
+    cond = np.array([[True, False], [False, True]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(cond),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_where_with_callable_cond():
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    class CallableCond:
+        def __call__(self, df):
+            return df % 2 == 0
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(CallableCond(), -1),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_where_with_callable_other():
+    data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    native_df = native_pd.DataFrame(data)
+    snow_df = pd.DataFrame(native_df)
+
+    class CallableOther:
+        def __call__(self, df):
+            return df**2
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(df % 2 == 0, CallableOther()),
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_dataframe_where_other_is_array():
+    data = [[1, 3], [2, 4]]
+    other = np.array([[99, -99], [101, -101]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.where(lambda x: x >= 3, other)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_dataframe_where_other_is_array_wrong_size_negative():
+    data = [[1, 2, 3], [3, 4, 5], [5, 6, 7]]
+    other = np.array([[99, -99], [101, -101]])
+
+    snow_df = pd.DataFrame(data=data)
+    native_df = native_pd.DataFrame(data=data)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(lambda x: x >= 3, other),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_dataframe_where_sizes_do_not_match_negative_test(test_data, test_cond):
+    snow_df = pd.DataFrame(test_data)
+    snow_df.set_index(["A"], inplace=True)
+
+    snow_cond_df = pd.DataFrame(test_cond)
+    snow_cond_df.set_index(["B", "C"], inplace=True)
+
+    with pytest.raises(ValueError, match="cannot join with no overlapping index names"):
+        snow_df.where(snow_cond_df)
+
+
+@sql_count_checker(query_count=3, join_count=2)
+def test_dataframe_where_with_np_array_cond():
+    data = [1, 2, 3]
+    cond = np.array([[False, True, False]]).T
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other,
+        columns=["A"],
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other,
+        columns=["A"],
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(cond, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(cond, snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_np_array_cond_mismatched_labels():
+    data = [1, 2, 3]
+    cond = np.array([[False, True, False]]).T
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(native_pd.DataFrame(cond), native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(pd.DataFrame(cond), snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_dataframe_cond_single_index_different_names():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"])
+    snow_cond_df = pd.DataFrame(cond, columns=["A"])
+    snow_other_df = pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    native_df = native_pd.DataFrame(data, columns=["A"])
+    native_cond_df = native_pd.DataFrame(cond, columns=["A"])
+    native_other_df = native_pd.DataFrame(
+        other, columns=["B"], index=pd.Index([1, 2, 3], name="A")
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(native_cond_df, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(snow_cond_df, snow_other_df),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_dataframe_cond_single_index_different_names_2():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    other = [4, 5, 6]
+
+    snow_df = pd.DataFrame(data, columns=["A"], index=pd.Index([1, 2, 3], name="B"))
+    snow_cond_df = pd.DataFrame(cond, columns=["A"])
+    snow_other_df = pd.DataFrame(other, columns=["B"])
+
+    native_df = native_pd.DataFrame(
+        data, columns=["A"], index=pd.Index([1, 2, 3], name="B")
+    )
+    native_cond_df = native_pd.DataFrame(cond, columns=["A"])
+    native_other_df = native_pd.DataFrame(other, columns=["B"])
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(native_cond_df, native_other_df)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(snow_cond_df, snow_other_df),
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_frame",
+    [
+        native_pd.DataFrame({"A": [False, True, False, True]}),
+        native_pd.DataFrame(
+            {"A": [False, True, False, True], "B": [False, False, False, True]}
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        10,
+        native_pd.DataFrame({"A": [6, 6, 6, 10]}),
+        native_pd.DataFrame({"A": [6, 6, 6, 10], "B": [8, 8, 9, 9]}),
+    ],
+)
+def test_dataframe_where_with_duplicated_index_aligned(cond_frame, other):
+    data = [3, 4, 5, 2]
+    # index with duplicated value 2
+    index = pd.Index([2, 1, 2, 3], name="index")
+    snow_df = pd.DataFrame({"A": data}, index=index)
+    native_df = native_pd.DataFrame({"A": data}, index=index)
+
+    native_cond = cond_frame
+    native_cond.index = index
+    snow_cond = pd.DataFrame(native_cond)
+
+    if isinstance(other, native_pd.DataFrame):
+        native_other = other
+        native_other.index = index
+        snow_other = pd.DataFrame(native_other)
+    else:
+        native_other = other
+        snow_other = other
+
+    expected_join_count = 1 if isinstance(other, int) else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df,
+            lambda df: df.where(native_cond, native_other)
+            if isinstance(df, native_pd.DataFrame)
+            else df.where(snow_cond, snow_other),
+        )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_dataframe_where_with_duplicated_index_unaligned():
+    data = [3, 4, 5, 2]
+    df_index = pd.Index([2, 1, 2, 3], name="index")
+    snow_df = pd.DataFrame({"A": data}, index=df_index)
+
+    index = pd.Index([1, 2, 2, 3], name="index")
+    cond_data = [False, True, False, True]
+    other_data = [4, 5, 6, 7]
+    snow_cond = pd.DataFrame({"A": cond_data}, index=index)
+    snow_other = pd.DataFrame({"A": other_data}, index=index)
+    snow_res = snow_df.where(snow_cond, snow_other)
+
+    # This behavior is different compare with native pandas. If index value in other and condition contains
+    # duplication, and not aligned with the index of the dataframe, Native pandas errors out with
+    # ValueError: putmask: mask and data must be the same size
+    # Snowpark pandas will suceed with left join behavior because validation of index uniqueness
+    # requires eager evaluation.
+    expected_pandas = native_pd.DataFrame(
+        {"A": [3, 3, 5, 6, 4, 5, 5, 5, 6, 2]},
+        index=pd.Index([2, 2, 2, 2, 1, 2, 2, 2, 2, 3], name="index"),
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_res, expected_pandas
+    )
+
+
+@pytest.mark.parametrize(
+    "cond_column_names, others_column_names, expected_error_msg",
+    [
+        (
+            ["A", "A", "C", "D", "E"],
+            None,
+            "Multiple columns are mapped to each label in ['A'] in DataFrame condition",
+        ),
+        (
+            None,
+            ["A", "C", "C", "D", "E"],
+            "Multiple columns are mapped to each label in ['C'] in DataFrame other",
+        ),
+        (
+            ["A", "B", "C", "C", "F"],
+            ["A", "C", "C", "D", "E"],
+            "Multiple columns are mapped to each label in ['C'] in DataFrame condition",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dataframe_where_with_duplicated_columns_negative(
+    test_data,
+    test_cond,
+    test_others,
+    cond_column_names,
+    others_column_names,
+    expected_error_msg,
+):
+    native_df = native_pd.DataFrame(test_data)
+    native_cond = native_pd.DataFrame(test_cond)
+    native_other = native_pd.DataFrame(test_others)
+    snow_df = pd.DataFrame(native_df)
+    snow_cond = pd.DataFrame(native_cond)
+    snow_other = pd.DataFrame(native_other)
+    # set the column to new names
+    if cond_column_names is not None:
+        native_cond.columns = cond_column_names
+        snow_cond.columns = cond_column_names
+    if others_column_names is not None:
+        native_other.columns = others_column_names
+        snow_other.columns = others_column_names
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.where(native_cond, native_other)
+        if isinstance(df, native_pd.DataFrame)
+        else df.where(snow_cond, snow_other),
+        expect_exception=True,
+        assert_exception_equal=False,
+        # The error message raised under those cases are different from native pandas.
+        # Native pandas raises ValueError with message "cannot reindex on an axis with duplicate labels"
+        # for duplication occurs in the condition frame,and raises InvalidIndexError with no message for
+        # duplication occurs in other frame.
+        # Snowpark pandas gives a clear message to the customer about what is the problem with the code.
+        expect_exception_type=ValueError,
+        expect_exception_match=re.escape(expected_error_msg),
+    )
+
+
+@sql_count_checker(query_count=4)
+def test_where_cond_with_base_df():
+    native_df = native_pd.DataFrame([1, 2], columns=["a"], index=[0, 1])
+    snow_df = pd.DataFrame(native_df)
+
+    def func1(df):
+        df["b"] = df["a"] == 1
+        return df
+
+    eval_snowpark_pandas_result(snow_df, native_df, func1, inplace=True)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df["a"].where(df["b"], df["a"] - 100),
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df["a"].where(df["b"] != True, df["a"] - 100),  # noqa: E712
+    )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df["a"].where(df["a"] == 1, df["a"] - 100),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_where_cond_with_base_df_filter_on_key():
+    native_df = native_pd.DataFrame({"key": [0, 1], "value": [2, 3]})
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.value.where(df.key == 0)
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize(
+    "data",
+    [[True], [True, False, True], [True, False, True, False]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+def test_where_series_cond(data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    def perform_where(df):
+        if isinstance(df, pd.DataFrame):
+            return df.where(pd.Series(data, name="SERIES_NAME"))
+        else:
+            return df.where(native_pd.Series(data, name="SERIES_NAME"))
+
+    eval_snowpark_pandas_result(snow_df, native_df, perform_where)
+
+
+@pytest.mark.parametrize(
+    "cond",
+    [1, [1], [[1]]],
+    ids=["scalar_cond", "scalar_cond_in_list", "scalar_cond_in_nested_list"],
+)
+def test_where_with_scalar_cond(cond):
+    native_ser = native_pd.DataFrame([[1, 2, 3]])
+    snow_ser = pd.DataFrame(native_ser)
+
+    sql_count = 1 if isinstance(cond, list) else 0
+
+    with SqlCounter(query_count=sql_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.where(cond, 1),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Array conditional must be same shape as self",
+            assert_exception_equal=True,
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_where_series_other_axis_not_specified():
+    native_df = native_pd.DataFrame([[1, 2, 3]])
+    snow_df = pd.DataFrame(native_df)
+
+    def perform_where(df):
+        if isinstance(df, pd.DataFrame):
+            return df.where([[True] * 3], pd.Series([1, 2, 3]))
+        else:
+            return df.where([[True] * 3], native_pd.Series([1, 2, 3]))
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_where,
+        assert_exception_equal=False,
+        expect_exception=True,
+        expect_exception_match=r"df.where requires an axis parameter \(0 or 1\) when given a Series",
+        expect_exception_type=ValueError,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=2)
+@pytest.mark.parametrize(
+    "data",
+    [[10], [10, 11, 12], [10, 11, 12, 13]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+@pytest.mark.parametrize(
+    "index", [True, False], ids=["matching_index", "unmatched_index"]
+)
+def test_where_series_other_axis_0(index, data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # Can't use string index here for unmatched index, since Snowpark Pandas does not support
+    # join with different index types.
+    index = [0, 1, 2, 3] if index else [4, 5, 6, 7]
+    index = index[: len(data)]
+
+    def perform_where(df):
+        if isinstance(df, pd.DataFrame):
+            return df.where(
+                [[False] * 3, [False] * 3, [False] * 3],
+                pd.Series(data, index=index),
+                axis=0,
+            )
+        else:
+            return df.where(
+                [[False] * 3, [False] * 3, [False] * 3],
+                native_pd.Series(data, index=index),
+                axis=0,
+            )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_where,
+    )
+
+
+@sql_count_checker(query_count=3, join_count=2, union_count=1)
+@pytest.mark.parametrize(
+    "data",
+    [[10], [10, 11, 12], [10, 11, 12, 13]],
+    ids=[
+        "series_shorter_than_dataframe",
+        "series_same_length_as_dataframe",
+        "series_longer_than_dataframe",
+    ],
+)
+@pytest.mark.parametrize(
+    "index", [True, False], ids=["matching_index", "unmatched_index"]
+)
+def test_where_series_other_axis_1(index, data):
+    native_df = native_pd.DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["col1", "col2", "col3"]
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    # Can't use int index here for unmatched index, since Snowpark Pandas does not support
+    # join with different index types.
+    index = [f"col{i}" for i in (range(4) if index else range(4, 9))]
+    index = index[: len(data)]
+
+    def perform_where(df):
+        if isinstance(df, pd.DataFrame):
+            return df.where(
+                [[False] * 3, [False] * 3, [False] * 3],
+                pd.Series(data, index=index),
+                axis=1,
+            )
+        else:
+            return df.where(
+                [[False] * 3, [False] * 3, [False] * 3],
+                native_pd.Series(data, index=index),
+                axis=1,
+            )
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        perform_where,
+    )
diff --git a/tests/integ/modin/groupby/__init__.py b/tests/integ/modin/groupby/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/groupby/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/groupby/conftest.py b/tests/integ/modin/groupby/conftest.py
new file mode 100644
index 00000000000..8940f591cdb
--- /dev/null
+++ b/tests/integ/modin/groupby/conftest.py
@@ -0,0 +1,233 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.utils import create_test_dfs
+
+# The aggregation methods whose result in snowflake is compatible with pandas
+result_compatible_agg_methods = [
+    lambda gr: gr.max(),
+    lambda gr: gr.min(),
+    lambda gr: gr.sum(),
+    lambda gr: gr.count(),
+    lambda gr: gr.std(),
+    lambda gr: gr.std(ddof=0),
+    lambda gr: gr.agg("max"),
+    lambda gr: gr.agg(["max", min, np.std]),
+]
+
+# the aggregation methods whose result on integer column is decimal with scale > 0 in snowflake,
+# but float in pandas, which will have some precision difference.
+int_to_decimal_float_agg_methods = [
+    lambda gr: gr.mean(),
+    lambda gr: gr.median(),
+    lambda gr: gr.var(),
+    lambda gr: gr.var(ddof=0),
+]
+all_agg_methods = result_compatible_agg_methods + int_to_decimal_float_agg_methods
+
+# List of groupby aggregate methods that takes min_count as argument
+min_count_methods = ["min", "max", "sum"]
+
+# Seeded random number generator.
+rng = np.random.default_rng(1234)
+multiindex_data = {
+    "A": [
+        "foo",
+        "bar",
+        "foo",
+        "bar",
+        "foo",
+        "bar",
+        "foo",
+        None,
+        "foo",
+        None,
+        "foo",
+        "bar",
+        "foo",
+        None,
+        "bar",
+        "foo",
+    ],
+    "B": [
+        "one",
+        "one",
+        "two",
+        None,
+        "two",
+        "two",
+        "one",
+        None,
+        "two",
+        "two",
+        "one",
+        None,
+        "one",
+        "one",
+        "two",
+        None,
+    ],
+    "C": rng.integers(low=-10, high=10, size=16),
+    "D": rng.integers(low=-10, high=10, size=16),
+}
+
+
+@pytest.fixture(params=min_count_methods)
+def min_count_method(request):
+    """Fixture for parametrization of result compatible aggregation methods."""
+    return request.param
+
+
+@pytest.fixture(params=result_compatible_agg_methods)
+def result_compatible_agg_method(request):
+    """Fixture for parametrization of result compatible aggregation methods."""
+    return request.param
+
+
+@pytest.fixture(params=int_to_decimal_float_agg_methods)
+def int_to_decimal_float_agg_method(request):
+    """Fixture for parametrization of decimal result methods."""
+    return request.param
+
+
+@pytest.fixture(params=all_agg_methods)
+def agg_method(request):
+    """Fixture for parametrization of all supported aggregation methods."""
+    return request.param
+
+
+@pytest.fixture(scope="function")
+def basic_snowpark_pandas_df():
+    return pd.DataFrame(
+        {
+            "col1": [2, 1, 1, 0, 2, 0],
+            "col2": [4, 5, 36, 7, 4, 5],
+            "col3": [3.1, 8.0, 12, 10, 4, 1.1],
+            "col4": [17, 3, 16, 15, 5, 6],
+            "col5": [-1, 3, -1, 3, -2, -1],
+        }
+    )
+
+
+@pytest.fixture(scope="function")
+def basic_snowpark_pandas_df_with_missing_values():
+    return pd.DataFrame(
+        {
+            "col1": [None, 1, 1, 0, None, 0],
+            "col2": [4, 5, None, 7, 4, 5],
+            "col3": [3.1, 8.0, 12, 10, 4, np.nan],
+            "col4": [17, 3, 16, 15, None, None],
+            "col5": [None, 3, -1, 3, -2, None],
+        }
+    )
+
+
+@pytest.fixture(scope="function")
+def df_multi():
+    df = pd.DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+            "C": np.random.randn(8),
+            "D": np.random.randn(8),
+        }
+    )
+    df_mi = df.set_index(["A", "B"])
+    return df_mi
+
+
+@pytest.fixture(scope="function")
+def series_multi_numeric():
+    index = pd.MultiIndex(
+        levels=[[1, 2], [1, 2]],
+        codes=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]],
+        names=["a", "b"],
+    )
+    return pd.Series([0, 1.0, 2.0, 3.0, 4.0, 5.0], index=index)
+
+
+@pytest.fixture(scope="function")
+def series_str():
+    index = pd.Index(["a", "b", "b", "a", "c"])
+    index.names = ["grp_col"]
+    return pd.Series(["ac", "ea", "be", "ce", "dc"], index=index)
+
+
+# Seeded random number generator.
+rng = np.random.default_rng(1234)
+# Number of rows to generate in the large df.
+NUM_ROWS = 100
+
+
+def generate_row_data():
+    """
+    Helper function to generate random row data.
+    """
+    return [
+        rng.uniform(low=-50, high=50),  # favorite_number
+        rng.choice(["red", "blue", "green", "yellow", None]),  # color
+        rng.integers(low=-200, high=10),  # random1
+        rng.uniform(low=-1000, high=1000),  # random2
+        rng.choice([np.nan, np.inf, 3, 4, 5, 6, 7, 8, 9]),  # random3
+        rng.binomial(n=100, p=0.005),  # random4
+    ]
+
+
+@pytest.fixture(scope="function")
+def large_df_with_na_values():
+    """
+    Helper function to create a large df with na values.
+    It returns both Snowpark pandas and native pandas DataFrames.
+    """
+    data = [generate_row_data() for _ in range(NUM_ROWS)]
+    index = rng.integers(low=-10, high=10, size=NUM_ROWS)
+    return create_test_dfs(
+        data,
+        columns=(
+            "favorite_number",
+            "color",
+            "random1",
+            "random2",
+            "random3",
+            "random4",
+        ),
+        index=index,
+    )
+
+
+@pytest.fixture(scope="function")
+def df_with_multiple_columns():
+    """
+    Create a Snowpark pandas and native pandas DataFrame with multiple columns.
+    """
+    return create_test_dfs(
+        [
+            [None, 1, 87, np.nan, -48, 8],
+            [1, 2, 3, -10, -20, -30],
+            [1, None, 6, 40, 50, 60],
+            [None, None, 7, -80, 90, 100],
+            [1, 2, 0, -70, np.nan, -90],
+            [1, None, 80, -100, -100, -100],
+            [1, 1, 6, 10, 50, 60],
+            [1, 2, 1, -10, 10, -10],
+            [None, None, 6, -9, 12, 15],
+            [1, 1, 16, 32, 64, 128],
+            [None, 1, -10, -20, -30],
+            [1, 2, 3, 6, 50, 60],
+            [1, None, 7, -80, 90, 100],
+            [None, None, 7, -70, -80, -90],
+            [1, 2, 80, -100, -100, -100],
+            [1, None, 80, np.nan, -100, -100],
+            [1, 1, 1, -10, 10, -10],
+            [1, 2, 6, -9, 12, 15],
+            [None, None, 16, 32, 64, 128],
+            [1, 1, 16, 3, 2, 1],
+        ],
+        columns=list("ABCDEF"),
+        index=list("cbafjihgedjihgcbafed"),
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_apply.py b/tests/integ/modin/groupby/test_groupby_apply.py
new file mode 100644
index 00000000000..e7ebf1b3b90
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_apply.py
@@ -0,0 +1,1031 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+import sys
+
+import cloudpickle
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.api.types import is_number
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    assert_values_equal,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+# Use the workaround shown below for applying functions that are attributes
+# of this module.
+# https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
+cloudpickle.register_pickle_by_value(sys.modules[__name__])
+
+
+@pytest.fixture
+def set_sql_simplifier(request):
+    """Set pd.session.sql_simplifier_enabled  and restore it after the test."""
+    old = pd.session.sql_simplifier_enabled
+    pd.session.sql_simplifier_enabled = request.param
+    yield
+    pd.session.sql_simplifier_enabled = old
+
+
+def add_one(df: native_pd.DataFrame) -> native_pd.DataFrame:
+    return df.applymap(
+        lambda v: (
+            v + "_1"
+            if isinstance(v, str)
+            else (v + 1)
+            if is_number(v)
+            else str(v) + "_1"
+        )
+    )
+
+
+def normalize_numeric_columns_by_sum(df: native_pd.DataFrame) -> native_pd.DataFrame:
+    numeric = df.select_dtypes("number")
+    df_copy = df.copy()
+    df_copy.loc[:, numeric.columns] = numeric / numeric.sum()
+    return df_copy
+
+
+def duplicate_df_rowwise(df: native_pd.DataFrame) -> native_pd.DataFrame:
+    return native_pd.concat([df, df])
+
+
+def transform_that_changes_columns(df: native_pd.DataFrame) -> native_pd.DataFrame:
+    return native_pd.DataFrame(
+        {
+            "custom_sum": df["int_col"].cumsum() + df["int_col"].max(),
+            "custom_string": df["string_col_1"].astype("object").cumsum()
+            + df["string_col_2"].str.cat(sep="-"),
+        }
+    )
+
+
+@pytest.fixture
+def grouping_dfs_with_multiindexes() -> tuple[pd.DataFrame, native_pd.DataFrame]:
+    # Repeat k0 a couple of times so that we can get groups of size > 1 when
+    # when grouping by k0. Also repeat some values of index level 0, and repeat
+    # values of the combination (k0, level_0).
+    return create_test_dfs(
+        [
+            ["k0", 13, "e"],
+            ["k1", 14, "d"],
+            ["k0", 15, "c"],
+            ["k0", 16, "b"],
+            [None, 17, "a"],
+        ],
+        index=pd.MultiIndex.from_tuples(
+            [("i1", "i3"), ("i1", "i2"), ("i0", "i0"), ("i1", "i4"), (None, "i0")],
+            names=["level_0", "level_1"],
+        ),
+        columns=pd.MultiIndex.from_tuples(
+            [("a", "string_col_1"), ("b", "int_col"), ("b", "string_col_2")],
+            names=["c1", "c2"],
+        ),
+    )
+
+
+# For almost all test cases, the query count is either 5 or 6,
+# the join count is 1, and the UDTF count is 1:
+
+# 0. Create temporary stage for UDTF (we filter this out when counting queries)
+# 0. Get pandas package and version (we filter this out when counting queries)
+# 1. list the contents of the temporary stage with a statement like `ls '@"TESTDB_SNOWPANDAS"."PUBLIC".SNOWPARK_TEMP_STAGE_SU8KJXMUZS'`
+# 2. Get `name` from the result of the previous `ls` command.
+# 2. Put a zip file with the UDTF definition in the temporary stage (we filter this out when counting queries)
+# 3. Create the UDTF (increasing UDTF count by 1)
+# 3. Apply the UDTF using a join (increasing join count by 1)
+# 4. Save UDTF result as temp table to work around SNOW-1060191
+# 5. convert result to pandas
+
+# For cases where we need to check whether we have a transform, we add an extra
+# query between 4) and 5) to check whether the function acted as a transform.
+
+QUERY_COUNT_WITHOUT_TRANSFORM_CHECK = 5
+QUERY_COUNT_WITH_TRANSFORM_CHECK = 6
+JOIN_COUNT = 1
+UDTF_COUNT = 1
+
+
+class TestFuncReturnsDataFrame:
+    @pytest.mark.parametrize(
+        "func",
+        [
+            normalize_numeric_columns_by_sum,
+            param(
+                lambda df: df.iloc[:, [2, 2, 0, 1]],
+                id="different_columns_but_same_index",
+            ),
+            param(
+                lambda df: (
+                    native_pd.DataFrame(
+                        [["k0_grouped", 0, 1], ["k0_grouped", 2, 2]],
+                        index=native_pd.Index([None, 3], name="new_index"),
+                        columns=df.columns,
+                    )
+                    if df[("a", "string_col_1")].iloc[0] == "k0"
+                    else native_pd.DataFrame(
+                        [["other_key_grouped", 100, 101]],
+                        index=native_pd.Index([None, 3], name="new_index"),
+                        columns=df.columns,
+                    )
+                ),
+                id="same_columns_but_different_index",
+            ),
+            param(lambda df: df.iloc[[-1, 0], ::-1], id="different_columns_and_index"),
+        ],
+    )
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_group_by_one_column_and_one_level_with_default_kwargs(
+        self, grouping_dfs_with_multiindexes, func
+    ):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(
+                ["level_0", ("a", "string_col_1")],
+            ).apply(func),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_df_with_default_index(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.reset_index(drop=True)
+            .groupby(("a", "string_col_1"))
+            .apply(normalize_numeric_columns_by_sum),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_func_returns_empty_frame(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs([[1, 2], [3, 4]]),
+            lambda df: df.groupby(0).apply(
+                lambda df: native_pd.DataFrame(index=[1, 3], columns=[4, 5])
+            ),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
+        def func(df, num1, str1):
+            return df.applymap(lambda x: "_".join((str(x), num1, str1)))
+
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby("level_0").apply(func, "0.3", str1="str1"),
+        )
+
+    @pytest.mark.parametrize(
+        "level",
+        [
+            0,
+            [0],
+            [1, 0],
+            "level_0",
+            ["level_0"],
+            ["level_1", "level_0"],
+            [0, "level_1"],
+        ],
+    )
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_group_by_level(self, grouping_dfs_with_multiindexes, level):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(level=level).apply(lambda df: df.iloc[::-1, ::-1]),
+        )
+
+    def test_dropna_false(self, grouping_dfs_with_multiindexes):
+        snow_df, pandas_df = grouping_dfs_with_multiindexes
+        # check that we are going to group by a column that has nulls.
+        assert pandas_df[("a", "string_col_1")].isna().sum() > 0
+
+        def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
+            return df.groupby(
+                ("a", "string_col_1"),
+                dropna=False,
+            ).apply(normalize_numeric_columns_by_sum)
+
+        with SqlCounter(
+            # When dropna=False, we can skip the dropna query
+            query_count=4,
+            udtf_count=UDTF_COUNT,
+            join_count=JOIN_COUNT,
+        ):
+            snow_result = operation(snow_df)
+        pandas_result = operation(pandas_df)
+        # results are equal if we ignore index
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_result.reset_index(drop=True), pandas_result.reset_index(drop=True)
+        )
+        # pandas index is not equal to snow index due to https://github.com/pandas-dev/pandas/issues/29111,
+        # so hardcode the index for comparison
+        assert_values_equal(
+            snow_result.index,
+            native_pd.MultiIndex.from_tuples(
+                [
+                    ("k0", "i1", "i3"),
+                    ("k0", "i0", "i0"),
+                    ("k0", "i1", "i4"),
+                    ("k1", "i1", "i2"),
+                    (np.nan, np.nan, "i0"),
+                ],
+                names=[("a", "string_col_1"), "level_0", "level_1"],
+            ),
+            check_index_type=False,
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        join_count=JOIN_COUNT,
+        udtf_count=UDTF_COUNT,
+    )
+    @pytest.mark.parametrize(
+        "null_value",
+        [
+            param(
+                None,
+                marks=pytest.mark.xfail(
+                    strict=True, raises=SnowparkSQLException, reason="SNOW-1233832"
+                ),
+            ),
+            np.nan,
+        ],
+    )
+    def test_group_dataframe_with_column_of_all_nulls_snow_1233832(self, null_value):
+        eval_snowpark_pandas_result(
+            *create_test_dfs({"null_col": [null_value], "int_col": [1]}),
+            lambda df: df.groupby("int_col").apply(lambda x: x),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    @pytest.mark.parametrize(
+        "by, expected_output",
+        [
+            (
+                "level_0",
+                native_pd.DataFrame(
+                    [
+                        ["k0", 0.302326, "e"],
+                        ["k1", 0.325581, "d"],
+                        ["k0", 0.372093, "b"],
+                        ["k0", 1.000000, "c"],
+                    ],
+                    index=pd.MultiIndex.from_tuples(
+                        [
+                            ("i1", "i1", "i3"),
+                            ("i1", "i1", "i2"),
+                            ("i1", "i1", "i4"),
+                            ("i0", "i0", "i0"),
+                        ],
+                        names=["level_0", "level_0", "level_1"],
+                    ),
+                    columns=pd.MultiIndex.from_tuples(
+                        [
+                            ("a", "string_col_1"),
+                            ("b", "int_col"),
+                            ("b", "string_col_2"),
+                        ],
+                        names=["c1", "c2"],
+                    ),
+                ),
+            ),
+            (
+                ("a", "string_col_1"),
+                native_pd.DataFrame(
+                    [
+                        ["k0", 0.295455, "e"],
+                        ["k0", 0.340909, "c"],
+                        ["k0", 0.363636, "b"],
+                        ["k1", 1.000000, "d"],
+                    ],
+                    index=pd.MultiIndex.from_tuples(
+                        [
+                            ("k0", "i1", "i3"),
+                            ("k0", "i0", "i0"),
+                            ("k0", "i1", "i4"),
+                            ("k1", "i1", "i2"),
+                        ],
+                        names=[("a", "string_col_1"), "level_0", "level_1"],
+                    ),
+                    columns=pd.MultiIndex.from_tuples(
+                        [
+                            ("a", "string_col_1"),
+                            ("b", "int_col"),
+                            ("b", "string_col_2"),
+                        ],
+                        names=["c1", "c2"],
+                    ),
+                ),
+            ),
+        ],
+    )
+    def test_sort_false(self, grouping_dfs_with_multiindexes, by, expected_output):
+        """
+        Pandas bug (this bug fixed in pandas 2.2): groupby.apply doesn't respect sort=False when grouping by a single level of an index or a single data colmn.
+        df = pd.DataFrame([], index=pd.MultiIndex.from_tuples([(3.1, 17), (1.1, 6)], names=['a', 'b']))
+        df.groupby('a', sort=True).apply(lambda group: 0)
+        df.groupby('a', sort=False).apply(lambda group: 0)
+
+        so , hardcode expected output.
+        """
+        snow_df, pandas_df = grouping_dfs_with_multiindexes
+
+        def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
+            return df.groupby(by, sort=False).apply(normalize_numeric_columns_by_sum)
+
+        snow_result_as_pandas = operation(snow_df)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_result_as_pandas, expected_output
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    @pytest.mark.parametrize("by", ["level_0", ("a", "string_col_1")])
+    @pytest.mark.parametrize(
+        "func",
+        # normalize_numeric_columns_by_sum is a transform and
+        # duplicate_df_rowwise is not. Include both because as_index
+        # behavior depends on whether the function is a transform.
+        [normalize_numeric_columns_by_sum, duplicate_df_rowwise],
+    )
+    def test_as_index_false(self, grouping_dfs_with_multiindexes, by, func):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(by=by, as_index=False).apply(func),
+        )
+
+    @pytest.mark.parametrize(
+        "as_index",
+        # parametrize by as_index because as_index seems to only have an effect for group_keys=False: https://github.com/pandas-dev/pandas/issues/57656
+        [True, False],
+        ids=lambda v: f"as_index_{v}",
+    )
+    @sql_count_checker(
+        # when group_keys=False, we have to check whether the function was a
+        # transform because we only reindex to the original ordering if
+        query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_group_keys_false(self, grouping_dfs_with_multiindexes, as_index):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(
+                by=["level_0", ("a", "string_col_1")],
+                as_index=as_index,
+                group_keys=False,
+            ).apply(normalize_numeric_columns_by_sum),
+        )
+
+    @sql_count_checker(query_count=0)
+    @pytest.mark.xfail(strict=True, raises=NotImplementedError)
+    def test_axis_one(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(level=0, axis=1).apply(
+                normalize_numeric_columns_by_sum
+            ),
+        )
+
+    @pytest.mark.parametrize("by", ["index", "string_col_1"], ids=lambda v: f"by_{v}")
+    @pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index_{v}")
+    @pytest.mark.parametrize(
+        "func",
+        [
+            transform_that_changes_columns,
+            duplicate_df_rowwise,
+        ],
+    )
+    @pytest.mark.parametrize(
+        "group_keys", [True, False], ids=lambda v: f"group_keys_{v}"
+    )
+    @pytest.mark.parametrize(
+        "dfs_kwargs",
+        [
+            param(
+                {
+                    "data": [
+                        ["k0", 13, "e"],
+                        ["k1", 14, "d"],
+                        ["k0", 15, "c"],
+                        ["k0", 16, "b"],
+                        [None, 17, "a"],
+                    ],
+                    "index": pd.Index(["i0", "i1", "i2", "i1", None], name="index"),
+                    "columns": pd.Index(
+                        ["string_col_1", "int_col", "string_col_2"], name="x"
+                    ),
+                },
+                id="with_non_unique_index",
+            ),
+            param(
+                {
+                    "data": [
+                        ["k0", 13, "e"],
+                        ["k1", 14, "d"],
+                        ["k0", 15, "c"],
+                        ["k0", 16, "b"],
+                    ],
+                    "index": pd.Index(["i1", None, "i0", "i2"], name="index"),
+                    "columns": pd.Index(
+                        ["string_col_1", "int_col", "string_col_2"], name="x"
+                    ),
+                },
+                id="with_unique_index",
+            ),
+        ],
+    )
+    def test_df_with_single_level_labels(
+        self, by, as_index, func, group_keys, dfs_kwargs
+    ):
+        mdf, pdf = create_test_dfs(**dfs_kwargs)
+
+        def operation(df: native_pd.DataFrame) -> native_pd.DataFrame:
+            return df.groupby(
+                by=by,
+                group_keys=group_keys,
+                as_index=as_index,
+            ).apply(func)
+
+        pandas_result = operation(pdf)
+        with SqlCounter(
+            # group_keys=False requires an extra query to check whether we're
+            # applying a transform.
+            query_count=(
+                QUERY_COUNT_WITHOUT_TRANSFORM_CHECK
+                if group_keys
+                else QUERY_COUNT_WITH_TRANSFORM_CHECK
+            ),
+            join_count=JOIN_COUNT,
+            udtf_count=UDTF_COUNT,
+        ):
+            snow_result = operation(mdf)
+            if (
+                not group_keys
+                and func is transform_that_changes_columns
+                and not pdf.index.is_unique
+            ):
+                # for transforms of a dataframe with non-unique index when
+                # group_keys=False, pandas loses the original row order and sorts
+                # the result in comparison order of the original index. This is a bug,
+                # with ongoing discussion in https://github.com/pandas-dev/pandas/issues/57656
+                # pandas source: https://github.com/pandas-dev/pandas/blob/e14a9bd41d8cd8ac52c5c958b735623fe0eae064/pandas/core/groupby/groupby.py#L1234-L1246
+                # Snowpark does the correct thing, which is to preserve input order,
+                # so we have to hard-code the expected output.
+                if by == "index":
+                    # note that as_index=False has no effect when group_keys=False.
+                    # The index doesn't include the group keys as its first levels;
+                    # it consists only of the index from the func() result.
+                    # for details see https://github.com/pandas-dev/pandas/issues/57656
+                    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                        snow_result,
+                        native_pd.DataFrame(
+                            {
+                                "custom_sum": [26, 30, 30, 46],
+                                "custom_string": ["k0e", "k1d-b", "k0c", "k1k0d-b"],
+                            },
+                            index=native_pd.Index(
+                                ["i0", "i1", "i2", "i1"], name="index"
+                            ),
+                        ),
+                    )
+                else:
+                    assert by == "string_col_1"
+                    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                        snow_result,
+                        native_pd.DataFrame(
+                            {
+                                "custom_sum": [29, 28, 44, 60],
+                                "custom_string": [
+                                    "k0e-c-b",
+                                    "k1d",
+                                    "k0k0e-c-b",
+                                    "k0k0k0e-c-b",
+                                ],
+                            },
+                            index=native_pd.Index(
+                                ["i0", "i1", "i2", "i1"], name="index"
+                            ),
+                        ),
+                    )
+            else:
+                assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                    snow_result, pandas_result
+                )
+
+    @pytest.mark.parametrize("set_sql_simplifier", [True, False], indirect=True)
+    @sql_count_checker(
+        # we need a transform check because group_keys=False.
+        query_count=QUERY_COUNT_WITH_TRANSFORM_CHECK,
+        join_count=JOIN_COUNT,
+        udtf_count=UDTF_COUNT,
+    )
+    def test_apply_transfform_to_subset(
+        self, grouping_dfs_with_multiindexes, set_sql_simplifier
+    ):
+        """Test a bug where groupby.apply on a subset of columns was giving a syntax error only if sql simplifier was off."""
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby("level_0", group_keys=False)[
+                [("b", "int_col"), ("b", "string_col_2")]
+            ].apply(normalize_numeric_columns_by_sum),
+        )
+
+    @pytest.mark.parametrize(
+        "result",
+        [
+            native_pd.DataFrame(["a", np.int64(5)]),
+            native_pd.DataFrame([["a", np.int64(5)]]),
+            param(
+                native_pd.DataFrame([[["a", np.int64(5)]]]),
+                marks=pytest.mark.xfail(
+                    strict=True,
+                    raises=SnowparkSQLException,
+                    reason="SNOW-1229760: np.int64 is nested inside the "
+                    + "single value of the dataframe, so we don't find it or "
+                    + "convert it to int.",
+                ),
+            ),
+        ],
+    )
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        join_count=JOIN_COUNT,
+        udtf_count=UDTF_COUNT,
+    )
+    def test_numpy_ints_in_result(self, grouping_dfs_with_multiindexes, result):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(level=0).apply(lambda grp: result),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=NotImplementedError,
+        reason="No support for applying a function that returns two dataframes that have different labels for the column at a given position",
+    )
+    def test_mismatched_data_column_positions(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby("level_0").apply(
+                lambda df: native_pd.DataFrame([0], columns=["a"])
+                if df.iloc[0, 1] == 13
+                else native_pd.DataFrame([1], columns=["b"])
+            ),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=NotImplementedError,
+        reason="No support for applying a function that returns two dataframes that have different names for a given index level",
+    )
+    def test_mismatched_index_column_positions(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby("level_0").apply(
+                lambda df: native_pd.DataFrame(
+                    [0],
+                    index=native_pd.Index([0], name="a"),
+                )
+                if df.iloc[0, 1] == 13
+                else native_pd.DataFrame(
+                    [0],
+                    index=native_pd.Index([0], name="b"),
+                )
+            ),
+        )
+
+    @pytest.mark.xfail(strict=True, raises=NotImplementedError, reason="SNOW-1176072")
+    def test_series_group_by(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                [
+                    ["k0", 13, "e"],
+                    ["k1", 14, "d"],
+                    ["k0", 15, "c"],
+                    ["k0", 16, "b"],
+                    [None, 17, "a"],
+                ],
+                index=pd.Index(["i1", None, "i0", "i2", None], name="index"),
+                columns=pd.Index(["string_col_1", "int_col", "string_col_2"], name="x"),
+            ),
+            lambda df: df.groupby("string_col_1")["int_col"].apply(
+                lambda series: native_pd.DataFrame(
+                    {"sum": [series.sum()], "mean": [series.mean()]}
+                )
+            ),
+        )
+
+
+class TestFuncReturnsScalar:
+    @pytest.mark.parametrize("sort", [True, False], ids=lambda v: f"sort_{v}")
+    @pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index_{v}")
+    @pytest.mark.parametrize(
+        "group_keys", [True, False], ids=lambda v: f"group_keys_{v}"
+    )
+    @pytest.mark.parametrize("dropna", [True, False], ids=lambda v: f"dropna_{v}")
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_volume_from_brazil_per_year(self, sort, dropna, group_keys, as_index):
+        """Test an example that a user provided here: https://snowflake.slack.com/archives/C05RX90ETGU/p1707126781811689"""
+        # TODO: group_keys should have no impact when func: df -> scalar
+        # (normally it tells whether to include group keys in the index)
+        # TODO: as_index=False is dropping the index, whereas it should be including the
+        # group keys as columns.
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                {
+                    "volume": [5, 6, 20, 9, 11, 13, 0.5],
+                    "country": [
+                        "brazil",
+                        "usa",
+                        "brazil",
+                        "brazil",
+                        "usa",
+                        "brazil",
+                        "usa",
+                    ],
+                    "year": [2020, 2020, None, 2020, 2019, 2019, None],
+                }
+            ),
+            lambda df: df.groupby(
+                "year",
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+                dropna=dropna,
+            ).apply(
+                lambda grp: grp[grp.country == "brazil"].volume.sum() / grp.volume.sum()
+            ),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_root_mean_squared_error(self):
+        """Test an example that a user provided here: https://groups.google.com/a/snowflake.com/g/snowpark-pandas-api-customer-adoption-DL/c/0PDdj9-p5Hs/m/pRJ-I08dBAAJ"""
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                {
+                    "actual": [1, 3, 5, 100],
+                    "expected": [2, -5, 9, 101],
+                    "customer_id": ["a", "a", "d", "c"],
+                }
+            ),
+            lambda df: df.groupby("customer_id").apply(
+                lambda grp: np.sqrt((grp.actual - grp.expected) ** 2).mean()
+            ),
+        )
+
+    @pytest.mark.parametrize(
+        "by", [["level_0", ("a", "string_col_1")], "level_0", ("a", "string_col_1")]
+    )
+    @pytest.mark.parametrize("sort", [True, False], ids=lambda v: f"sort_{v}")
+    @pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index_{v}")
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_multiindex_df(self, grouping_dfs_with_multiindexes, by, sort, as_index):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(
+                by,
+                sort=sort,
+                as_index=as_index,
+            ).apply(lambda df: df.astype(str).astype(object).sum().sum()),
+        )
+
+    @pytest.mark.parametrize(
+        "return_value",
+        [
+            param(None, id="None"),
+            param(np.nan, id="nan"),
+            param(1, id="int"),
+            param(np.int64(1), id="int64"),
+            param([1, "a"], id="list_of_int_and_string"),
+            param({"1": 4}, id="dict"),
+            param([1, 4], id="list_of_ints"),
+            param([[1, 4]], id="list_of_list_of_lists"),
+            param((1, 4), id="tuple_of_ints"),
+            param(
+                datetime.date(day=29, month=7, year=1994),
+                id="date",
+                marks=pytest.mark.xfail(
+                    strict=True, raises=AssertionError, reason="SNOW-1217565"
+                ),
+            ),
+        ],
+    )
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_non_series_or_dataframe_return_types(
+        self, return_value, grouping_dfs_with_multiindexes
+    ):
+        """These return types are scalars in the sense that they are not pandas Series or DataFrames."""
+        snow_df, pandas_df = grouping_dfs_with_multiindexes
+
+        def operation(df):
+            return df.groupby(level=0).apply(lambda df: return_value)
+
+        if return_value is None:
+            # this is a pandas bug: https://github.com/pandas-dev/pandas/issues/57775
+            assert operation(pandas_df).equals(native_pd.DataFrame())
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                operation(snow_df),
+                native_pd.Series(
+                    [None, None], index=pd.Index(["i0", "i1"], name="level_0")
+                ),
+            )
+        else:
+            eval_snowpark_pandas_result(
+                snow_df,
+                pandas_df,
+                operation,
+            )
+
+
+class TestFuncReturnsSeries:
+    @pytest.mark.parametrize(
+        "by,level",
+        [
+            ("level_0", None),
+            (("a", "string_col_1"), None),
+            ([("a", "string_col_1"), "level_0"], None),
+            (None, 1),
+            (None, [1, 0]),
+            (None, [0, "level_1"]),
+        ],
+    )
+    @pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index={v}")
+    @pytest.mark.parametrize("sort", [True, False], ids=lambda v: f"sort={v}")
+    @pytest.mark.parametrize(
+        "group_keys", [True, False], ids=lambda v: f"group_keys={v}"
+    )
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_return_series_with_two_columns(
+        self, grouping_dfs_with_multiindexes, by, level, as_index, sort, group_keys
+    ):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(
+                by=by, level=level, as_index=as_index, sort=sort, group_keys=group_keys
+            ).apply(
+                lambda group: native_pd.Series(
+                    {
+                        "custom_sum": group[("b", "int_col")].sum()
+                        + group[("b", "int_col")].max(),
+                        "custom_string": group[("a", "string_col_1")].str.cat(sep="-")
+                        + group[("b", "string_col_2")].str.cat(sep="_"),
+                    },
+                    name="custom_metrics",
+                )
+            ),
+        )
+
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_args_and_kwargs(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(level=0).apply(
+                lambda group, arg0, kwarg0="a", kwarg1="b": native_pd.Series(
+                    {
+                        "custom_sum": group[("b", "int_col")].sum() + arg0,
+                        "custom_string": group[("a", "string_col_1")].str.cat(sep="-")
+                        + group[("b", "string_col_2")].str.cat(sep="_")
+                        + kwarg0
+                        + kwarg1,
+                    },
+                    name="custom_metrics",
+                ),
+                7,
+                kwarg1="x",
+            ),
+        )
+
+    @pytest.mark.parametrize("dropna", [True, False])
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    @pytest.mark.parametrize("index", [[2.0, np.nan, 2.0, 1.0], [np.nan] * 4])
+    def test_dropna(self, dropna, index):
+        pandas_index = pd.Index(index, name="index")
+        if dropna and pandas_index.isna().all():
+            pytest.xfail(
+                reason="We drop all the rows, apply the UDTF, and try to "
+                + "pivot the result, but pivoting an empty frame "
+                + "causes a SQL error due to SNOW-1233895"
+            )
+        # use a df without multiindexes so we don't have to work around
+        # https://github.com/pandas-dev/pandas/issues/29111
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                [
+                    ["k0", 13, "e"],
+                    ["k1", 14, "d"],
+                    ["k0", 15, "c"],
+                    ["k0", 16, "b"],
+                ],
+                index=pandas_index,
+                columns=pd.Index(["string_col_1", "int_col", "string_col_2"], name="x"),
+            ),
+            lambda df: df.groupby("index", dropna=dropna).apply(
+                lambda group: native_pd.Series(
+                    {
+                        "custom_sum": group["int_col"].sum() + group["int_col"].max(),
+                        "custom_string": group["string_col_1"].str.cat(sep="-")
+                        + group["string_col_2"].str.cat(sep="_"),
+                    },
+                    name="custom_metrics",
+                )
+            ),
+        )
+
+    @pytest.mark.xfail(raises=NotImplementedError, strict=True, reason="SNOW-1232201")
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_returning_series_with_different_names(
+        self, grouping_dfs_with_multiindexes
+    ):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(("a", "string_col_1")).apply(
+                lambda group: native_pd.Series(
+                    {
+                        "int_sum": group[("b", "int_col")].sum(),
+                        "string_sum": group[("b", "string_col_2")].astype(object).sum(),
+                    },
+                    name="name_" + group[("a", "string_col_1")].iloc[0],
+                )
+            ),
+        )
+
+    @pytest.mark.xfail(raises=NotImplementedError, strict=True, reason="SNOW-1232208")
+    @sql_count_checker(
+        query_count=QUERY_COUNT_WITHOUT_TRANSFORM_CHECK,
+        udtf_count=UDTF_COUNT,
+        join_count=JOIN_COUNT,
+    )
+    def test_returning_series_with_conflicting_indexes(
+        self, grouping_dfs_with_multiindexes
+    ):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(("a", "string_col_1")).apply(
+                lambda group: native_pd.Series(
+                    {
+                        # Since we are grouping by ("a", "string_col_1"), the
+                        # series we return for each group will have a different index.
+                        group[("a", "string_col_1")]
+                        .iloc[0]: group[("b", "int_col")]
+                        .sum(),
+                        group[("a", "string_col_1")].iloc[0]
+                        + "_2": group[("b", "string_col_2")].astype(object).sum(),
+                    },
+                )
+            ),
+        )
+
+
+class TestNonCallableFunc:
+    @pytest.mark.xfail(strict=True, raises=NotImplementedError, reason="SNOW-1177529")
+    def test_non_callable_aggregation(self, grouping_dfs_with_multiindexes):
+        eval_snowpark_pandas_result(
+            *grouping_dfs_with_multiindexes,
+            lambda df: df.groupby(("a", "string_col_1")).apply("min"),
+        )
+
+
+class TestCallableWithMixedReturnTypes:
+    """
+    Test callables that return a mix of Series, DataFrame, and scalar.
+
+    pandas behavior here is buggy or ill-defined, so assume all these tests
+    fail and defer fixing them till a user complains. SNOW-1236959 tracks
+    efforts to fix these cases.
+
+    pandas behavior seems to depend on the order of the types of the returned
+    objects (e.g. returning a dataframe for group 1 and series for group 2
+    has a different effect than returning a series for group 1 and a dataframe
+    for group 2). We won't enumerate every possible permutation here.
+    """
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=NotImplementedError,
+        reason="NotImplementedError in Snowpark pandas. see SNOW-1236959",
+    )
+    def test_scalar_then_dataframe(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs([["b", 8], ["a", 7]]),
+            lambda df: df.groupby(0).apply(
+                lambda group: 1
+                if group.iloc[0, 0] == "b"
+                else native_pd.DataFrame([[2, 4], [5, 6]])
+            ),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=AssertionError,
+        reason="Snowpark pandas transposes the series, but pandas doesn't. See SNOW-1236959",
+    )
+    def test_series_then_dataframe(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs([["b", 8], ["a", 7]]),
+            lambda df: df.groupby(0).apply(
+                lambda group: native_pd.Series(["a", "b"])
+                if group.iloc[0, 0] == "b"
+                else native_pd.DataFrame([1])
+            ),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=AttributeError,
+        reason="pandas gives AttributeError: 'int' object has no attribute 'index'. See SNOW-1236959",
+    )
+    def test_scalar_then_series(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs([["b", 8], ["a", 7]]),
+            lambda df: df.groupby(0).apply(
+                lambda group: 1
+                if group.iloc[0, 0] == "b"
+                else native_pd.Series([2, 3, 4])
+            ),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        raises=AttributeError,
+        reason="pandas gives AttributeError: 'int' object has no attribute 'index'. See SNOW-1236959",
+    )
+    def test_scalar_then_series_then_dataframe(self):
+        eval_snowpark_pandas_result(
+            *create_test_dfs([["b", 8], ["a", 7], ["c", 9]]),
+            lambda df: df.groupby(0).apply(
+                lambda group: 1
+                if group.iloc[0, 0] == "b"
+                else native_pd.Series([2, 3, 4])
+                if group.iloc[0, 0] == "a"
+                else native_pd.DataFrame([[2, 4], [5, 6]])
+            ),
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_basic_agg.py b/tests/integ/modin/groupby/test_groupby_basic_agg.py
new file mode 100644
index 00000000000..53b511ad69d
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_basic_agg.py
@@ -0,0 +1,914 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+from typing import Any
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame
+from modin.pandas.groupby import DataFrameGroupBy as SnowparkPandasDFGroupBy
+from pandas import NA, NaT, Timestamp
+from pandas.core.groupby.generic import DataFrameGroupBy as PandasDFGroupBy
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.types import IntegerType, StringType, VariantType
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    TEST_DF_DATA,
+    ColumnSchema,
+    assert_frame_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    create_snow_df_with_table_and_data,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+from tests.utils import Utils
+
+
+def eval_groupby_result(
+    snowpark_pandas_df: DataFrame,
+    by: Any,
+    dropna: bool = True,
+    sort: bool = True,
+    as_index: bool = True,
+) -> tuple[SnowparkPandasDFGroupBy, PandasDFGroupBy]:
+    """
+    apply groupby on the snowpark pandas dataframe using the by, and performs checks against
+    the groupby result on native pandas dataframe converted from the snowpark pandas dataframe.
+
+    Returns:
+        SnowparkPandasDFGroupBy: the snowpark pandas groupby result
+        PandasDFGroupBy: the pandas groupby result
+    """
+    pandas_df = snowpark_pandas_df.to_pandas()
+
+    snowpark_pandas_groupby = snowpark_pandas_df.groupby(
+        by=by, sort=sort, dropna=dropna, as_index=as_index
+    )
+    pandas_groupby = pandas_df.groupby(
+        by=by, sort=sort, dropna=dropna, as_index=as_index
+    )
+
+    return snowpark_pandas_groupby, pandas_groupby
+
+
+@pytest.mark.parametrize("by", ["col1", ["col3"], ["col5"]])
+@sql_count_checker(query_count=2)
+def test_basic_single_group_row_groupby(
+    result_compatible_agg_method, basic_snowpark_pandas_df, by
+) -> None:
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(
+        basic_snowpark_pandas_df, by
+    )
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        result_compatible_agg_method,
+    )
+
+
+@pytest.mark.parametrize(
+    "by_col_data, expected_groupby_col",
+    [
+        (["g1", 1, 1, "g1", 3.2, 3.2], [1, 3.2, "g1"]),
+        (
+            [(1, 1), (1, 2), (1, 1), (2, 2), (2, 1), (1, 2)],
+            [(1, 1), (1, 2), (2, 1), (2, 2)],
+        ),
+    ],
+)
+@sql_count_checker(query_count=3)
+def test_single_group_row_groupby_with_variant(
+    session,
+    test_table_name,
+    result_compatible_agg_method,
+    by_col_data,
+    expected_groupby_col,
+) -> None:
+    # this test uses dataframe created by directly reading from a snowflake table, because write_pandas
+    # in python connector can not handle columns with mixed data type correctly (SNOW-841827).
+    pandas_df = native_pd.DataFrame(
+        {
+            "COL_0": by_col_data,
+            "COL_1": np.arange(6, dtype="float64"),
+            "COL_2": [2, 3, 1, 5, 4, 10],
+        }
+    )
+    column_schema = [
+        ColumnSchema("COL_0", VariantType()),
+        ColumnSchema("COL_1", IntegerType()),
+        ColumnSchema("COL_2", IntegerType()),
+    ]
+    snowpark_pandas_df = create_snow_df_with_table_and_data(
+        session, test_table_name, column_schema, pandas_df.values.tolist()
+    )
+
+    by = "COL_0"
+    with SqlCounter(query_count=1):
+        snowpark_pandas_groupby = snowpark_pandas_df.groupby(by=by)
+        pandas_groupby = pandas_df.groupby(by=by)
+
+        eval_snowpark_pandas_result(
+            snowpark_pandas_groupby,
+            pandas_groupby,
+            result_compatible_agg_method,
+        )
+
+
+@sql_count_checker(query_count=8)
+def test_groupby_agg_with_decimal_dtype(session, agg_method) -> None:
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, "COL_G string, COL_D decimal(38, 1)", is_temporary=True
+    )
+    session.sql(f"insert into {table_name} values ('A', 1)").collect()
+    session.sql(f"insert into {table_name} values ('B', 2)").collect()
+    session.sql(f"insert into {table_name} values ('A', 3)").collect()
+    session.sql(f"insert into {table_name} values ('B', 5)").collect()
+
+    snowpark_pandas_df = pd.read_snowflake(table_name)
+    pandas_df = snowpark_pandas_df.to_pandas()
+
+    by = "COL_G"
+    with SqlCounter(query_count=1):
+        snowpark_pandas_groupby = snowpark_pandas_df.groupby(by=by)
+        pandas_groupby = pandas_df.groupby(by=by)
+        eval_snowpark_pandas_result(snowpark_pandas_groupby, pandas_groupby, agg_method)
+
+
+@sql_count_checker(query_count=2)
+def test_groupby_agg_with_float_dtypes(agg_method) -> None:
+    snowpark_pandas_df = pd.DataFrame(
+        {
+            "col1_grp": ["g1", "g2", "g0", "g0", "g2", "g3", "g0", "g2", "g3"],
+            "col2_float16": np.arange(9, dtype="float16") // 3,
+            "col3_float64": np.arange(9, dtype="float64") // 4,
+            "col4_float32": np.arange(9, dtype="float32") // 5,
+            "col5_mixed": np.concatenate(
+                [
+                    np.arange(3, dtype="int64"),
+                    np.arange(3, dtype="float32"),
+                    np.arange(3, dtype="float64"),
+                ]
+            ),
+            "col6_float_identical": [3.0] * 9,
+            "col7_float_missing": [
+                3.0,
+                2.0,
+                np.nan,
+                1.0,
+                np.nan,
+                4.0,
+                np.nan,
+                np.nan,
+                7.0,
+            ],
+            "col8_mix_missing": np.concatenate(
+                [
+                    np.arange(2, dtype="int64"),
+                    [np.nan, np.nan],
+                    np.arange(2, dtype="float32"),
+                    [np.nan],
+                    np.arange(2, dtype="float64"),
+                ]
+            ),
+        }
+    )
+
+    by = "col1_grp"
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(
+        snowpark_pandas_df, by
+    )
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        agg_method,
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_groupby_agg_with_int_dtypes(int_to_decimal_float_agg_method) -> None:
+    snowpark_pandas_df = pd.DataFrame(
+        {
+            "col1_grp": ["g1", "g2", "g0", "g0", "g2", "g3", "g0", "g2", "g3"],
+            "col2_int64": np.arange(9, dtype="int64") // 3,
+            "col3_int_identical": [2] * 9,
+            "col4_int32": np.arange(9, dtype="int32") // 4,
+            "col5_int16": np.arange(9, dtype="int16") // 3,
+            "col6_mixed": np.concatenate(
+                [
+                    np.arange(3, dtype="int64") // 3,
+                    np.arange(3, dtype="int32") // 3,
+                    np.arange(3, dtype="int16") // 3,
+                ]
+            ),
+            "col7_bool": [True] * 5 + [False] * 4,
+            "col8_bool_missing": [
+                True,
+                None,
+                False,
+                False,
+                None,
+                None,
+                True,
+                False,
+                None,
+            ],
+            "col9_int_missing": [5, 6, np.nan, 2, 1, np.nan, 5, np.nan, np.nan],
+            "col10_mixed_missing": np.concatenate(
+                [
+                    np.arange(2, dtype="int64") // 3,
+                    [np.nan],
+                    np.arange(2, dtype="int32") // 3,
+                    [np.nan],
+                    np.arange(2, dtype="int16") // 3,
+                    [np.nan],
+                ]
+            ),
+        }
+    )
+
+    # Snowflake boolean column is always the nullable, cast the col8_bool_missing column in native pandas
+    # to nullable boolean dtype for result comparison
+    native_df = snowpark_pandas_df.to_pandas().astype({"col8_bool_missing": "boolean"})
+    by = "col1_grp"
+    snowpark_pandas_groupby = snowpark_pandas_df.groupby(by=by)
+    pandas_groupby = native_df.groupby(by=by)
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        int_to_decimal_float_agg_method,
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        "max",
+        "min",
+        np.max,
+        np.min,
+        min,
+        sum,
+        np.std,
+        "var",
+        {"col2": "sum"},
+        {"col2": [np.sum]},
+        {"col2": [np.sum, max, "min"]},
+        {"col2": "max", "col4": ["sum", np.max], "col5": min},
+    ],
+)
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=2)
+def test_single_group_row_groupby_agg(
+    basic_snowpark_pandas_df, agg_func, as_index, sort
+) -> None:
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by="col1", sort=sort, as_index=as_index).agg(agg_func),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "key_col": [0, 1, 1, 2, 3],
+            "string_col": ["a", "b", "c", "d", "e"],
+            "float_col": [0.5, 1.5, 2.5, 3.5, 4.5],
+        },
+        {
+            "key_col": [0, 1, 1, 2, 3],
+            "string_col": ["a", "b", None, "d", "e"],
+            "float_col": [0.5, 1.5, 2.5, 3.5, 4.5],
+        },
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_string_sum(data):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(data),
+        lambda df: df.groupby("key_col").sum(numeric_only=False),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_string_sum_with_all_nulls_in_group_produces_empty_string():
+    """
+    pandas groupby.sum() gives 0 for groups where the aggregated column is all
+    null. snowpark pandas returns an empty string, matching the behavior of
+    snowflake LISTAGG(). Returning 0 requires an extra query to check for a
+    group of all nulls and, if there is one, cast the intermediate aggregation
+    results to VARIANT so that the final column can contain both 0 and strings.
+    Instead just follow snowpark behavior.
+
+    It's possible that the pandas behavior here is wrong and the sum should be
+    null if all the string values are null. Ongoing discussion is here:
+    https://github.com/pandas-dev/pandas/issues/53568#issuecomment-1904973950
+    """
+    snow_df, pandas_df = create_test_dfs({"key_col": [0, 1], "string_col": [None, "a"]})
+    snow_result = snow_df.groupby("key_col").sum()
+    pandas_result = pandas_df.groupby("key_col").sum()
+    assert_frame_equal(
+        pandas_result,
+        native_pd.DataFrame(
+            {"string_col": [0, "a"]}, index=native_pd.Index([0, 1], name="key_col")
+        ),
+    )
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_result,
+        native_pd.DataFrame(
+            {"string_col": ["", "a"]}, index=pd.Index([0, 1], name="key_col")
+        ),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_string_sum_on_reversed_df():
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {
+                "key_col": [0, 1, 1, 2, 3],
+                "string_col": ["a", "b", "c", "d", "e"],
+                "float_col": [0.5, 1.5, 2.5, 3.5, 4.5],
+            },
+        ),
+        lambda df: df[::-1].groupby("key_col").sum(numeric_only=False),
+    )
+
+
+@pytest.mark.parametrize(
+    "by", ["col1", ["col1", "col2", "col3"], ["col1", "col1", "col2"]]
+)
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_agg_on_groupby_columns(
+    basic_snowpark_pandas_df, by, as_index, sort
+) -> None:
+    agg_func = {"col1": [min, max], "col2": "count"}
+    native_pandas = basic_snowpark_pandas_df.to_pandas()
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        native_pandas,
+        lambda df: df.groupby(by=by, sort=sort, as_index=as_index).agg(agg_func),
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        ["max", "min"],
+        ("max", "min"),
+        ["min", "max", min],
+    ],
+)
+@pytest.mark.parametrize("as_index", [True, False])
+@sql_count_checker(query_count=2)
+def test_single_group_row_groupby_agg_list(
+    basic_snowpark_pandas_df, agg_func, as_index
+) -> None:
+    native_df = basic_snowpark_pandas_df.to_pandas()
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        native_df,
+        lambda df: df.groupby(by="col1", as_index=as_index).agg(agg_func),
+    )
+
+
+@pytest.mark.parametrize(
+    "group_data",
+    [
+        ["A", "B", "A", "B"],
+        ["A", np.nan, "A", np.nan],
+        ["A", np.nan, "A", "B"],
+        [np.nan, np.nan, np.nan, np.nan],
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@sql_count_checker(query_count=1)
+def test_groupby_dropna_single_index(group_data, dropna, as_index) -> None:
+    pandas_df = native_pd.DataFrame(
+        {"grp_col": group_data, "value": [123.23, 13.0, 12.3, 1.0]}
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby(by="grp_col", dropna=dropna, as_index=as_index).max(),
+    )
+
+
+@pytest.mark.parametrize(
+    "group_index, expected_index_dropna_false",
+    [
+        (
+            pd.MultiIndex.from_tuples(
+                [("foo", "one"), ("bar", "two"), ("foo", "one"), ("bar", "one")],
+                name=["A", "B"],
+            ),
+            pd.MultiIndex.from_tuples(
+                [("bar", "one"), ("bar", "two"), ("foo", "one")], name=["A", "B"]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_tuples(
+                [("foo", "one"), ("bar", "two"), ("foo", np.nan), ("bar", np.nan)],
+                name=["A", "B"],
+            ),
+            pd.MultiIndex.from_tuples(
+                [("bar", "two"), ("bar", np.nan), ("foo", "one"), ("foo", np.nan)],
+                name=["A", "B"],
+            ),
+        ),
+        (
+            pd.MultiIndex.from_tuples(
+                [("foo", np.nan), ("bar", "two"), ("foo", np.nan), (np.nan, "one")],
+                name=["A", "B"],
+            ),
+            pd.MultiIndex.from_tuples(
+                [("bar", "two"), ("foo", np.nan), (np.nan, "one")], name=["A", "B"]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_tuples(
+                [("foo", "one"), (np.nan, np.nan), ("foo", "one"), (np.nan, np.nan)],
+                name=["A", "B"],
+            ),
+            pd.MultiIndex.from_tuples(
+                [("foo", "one"), (np.nan, np.nan)], name=["A", "B"]
+            ),
+        ),
+        (
+            pd.MultiIndex.from_tuples(
+                [
+                    (np.nan, np.nan),
+                    (np.nan, np.nan),
+                    (np.nan, np.nan),
+                    (np.nan, np.nan),
+                ],
+                name=["A", "B"],
+            ),
+            pd.MultiIndex.from_tuples([(np.nan, np.nan)], name=["A", "B"]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=1)
+def test_groupby_dropna_multi_index(
+    group_index, expected_index_dropna_false, dropna
+) -> None:
+    pandas_df = native_pd.DataFrame(
+        {"value": [123.23, 13.0, 12.3, 1.0]}, index=group_index
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    snow_res = snow_df.groupby(by=["A", "B"], dropna=dropna).sum()
+    pandas_res = pandas_df.groupby(by=["A", "B"], dropna=dropna).sum()
+    if not dropna:
+        # when dropna is false, we manually reset the expected multiindex for comparison. This is due to
+        # https://github.com/pandas-dev/pandas/issues/29111. That bug is caused by the difference
+        # of unique labels used at levels when np.nan is involved, where the native pandas groupby result uses np.nan
+        # as a unique label when create the final multiindex, but the default pandas multiindex creation doesn't treat
+        # np.nan as a unique label, but uses -1 to refer to np.nan, which is the mechanism used by Snowpark pandas.
+        # However, these two representation yields the same index values. For example:
+        # pd.MultiIndex(levels=[['bar', 'foo', np.nan], ['one', 'two', np.nan]], codes=[[0, 1, 2], [1, 2, 0]])
+        # and pd.MultiIndex(levels=[['bar', 'foo'], ['one', 'two']], codes=[[0, 1, -1], [1, -1, 0]]) produces the same
+        # multiindex result as following:
+        # MultiIndex([('bar', 'two'),
+        #             ('foo',   nan),
+        #             (  nan, 'one')],)
+        pandas_res.index = expected_index_dropna_false
+    assert_frame_equal(snow_res, pandas_res)
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_with_dropna_random(agg_method, dropna: bool) -> None:
+    snowpark_pandas_df = pd.DataFrame(TEST_DF_DATA["float_nan_data"])
+    pandas_df = snowpark_pandas_df.to_pandas()
+
+    by = ["col2"]
+
+    snowpark_pandas_groupby = snowpark_pandas_df.groupby(by=by, dropna=dropna)
+    pandas_groupby = pandas_df.groupby(by=by, dropna=dropna)
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        agg_method,
+    )
+
+
+@pytest.mark.parametrize(
+    "by", ["col1_str", "col2_int", "col3_float", ["col3_float", "col1_str"]]
+)
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_with_sort(by, sort) -> None:
+    snowpark_pandas_df = pd.DataFrame(
+        {
+            "col1_str": ["g1", "g2", "g0", "g0", "g2", "g3", "g0", "g2"],
+            "col2_int": [2, 6, 4, 9, 4, 2, 1, 6],
+            "col3_float": [3.5, 1.42, 3.72, 2.0, 3.72, 1.42, 2.0, 3.5],
+        }
+    )
+
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(
+        snowpark_pandas_df, by=by, sort=sort
+    )
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        lambda gr: gr.sum(numeric_only=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "by", ["col1", "col2", ["col5", "col1"], ["col5", "col2", "col1"]]
+)
+@sql_count_checker(query_count=2)
+def test_groupby_with_index_columns(basic_snowpark_pandas_df, by) -> None:
+    snow_df = basic_snowpark_pandas_df.set_index(["col1", "col5"])
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(snow_df, by)
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        lambda gr: gr.sum(),
+    )
+
+
+@pytest.mark.parametrize("by", [["col1", "col5"], ["col5", "col1"], ["col1", "col1"]])
+@sql_count_checker(query_count=2)
+def test_multi_group_row_groupby(basic_snowpark_pandas_df, by) -> None:
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(
+        basic_snowpark_pandas_df, by
+    )
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        lambda gr: gr.sum(),
+    )
+
+
+@pytest.mark.parametrize("by", [[None, "col1"], [None, None]])
+@sql_count_checker(query_count=2)
+def test_groupby_with_none_label(basic_snowpark_pandas_df, by) -> None:
+    snow_df = basic_snowpark_pandas_df.set_index(["col1"])
+    snow_df.columns = ["col2", None, "col4", "col5"]
+    eval_snowpark_pandas_result(
+        snow_df,
+        snow_df.to_pandas(),
+        lambda df: df.groupby(by=by).max(),
+    )
+
+
+@pytest.mark.parametrize(
+    "grp_func", [lambda grp: grp.max(), lambda grp: grp.sum(), lambda grp: grp.mean()]
+)
+@sql_count_checker(query_count=2)
+def test_groupby_with_series(basic_snowpark_pandas_df, grp_func):
+    pandas_df = basic_snowpark_pandas_df.to_pandas()
+
+    # verify Series from the Dataframe can be handled
+    snowpark_pandas_groupby = basic_snowpark_pandas_df.groupby(
+        by=basic_snowpark_pandas_df["col1"]
+    )
+    pandas_groupby = pandas_df.groupby(by=pandas_df["col1"])
+
+    # evaluate the basic aggregation result
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        grp_func,
+    )
+
+
+@pytest.mark.parametrize("by", ["A", ["A", "B"]])
+@pytest.mark.parametrize(
+    "op", [lambda x: x.sum(numeric_only=False), lambda x: x.median(numeric_only=False)]
+)
+@sql_count_checker(query_count=2)
+def test_groupby_multiple_columns(df_multi, by, op):
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(df_multi, by)
+    eval_snowpark_pandas_result(snowpark_pandas_groupby, pandas_groupby, op)
+
+
+@pytest.mark.parametrize("numeric_only", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_agg_of_booleans(result_compatible_agg_method, numeric_only):
+    df = pd.DataFrame(
+        {
+            "groupby_col": [2] * 10 + [1] * 10,
+            "bool": [True] * 5 + [False] * 5 + [True] * 5 + [False] * 5,
+        }
+    )
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(df, "groupby_col")
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        result_compatible_agg_method,
+    )
+
+
+@pytest.mark.parametrize(
+    "grp_func",
+    [
+        lambda grp: grp.max(numeric_only=True),
+        lambda grp: grp.min(numeric_only=True),
+        lambda grp: grp.count(),
+        lambda grp: grp.sum(numeric_only=True),
+        lambda grp: grp.mean(numeric_only=True),
+        lambda grp: grp.std(numeric_only=True),
+        lambda grp: grp.var(numeric_only=True),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_groupby_agg_invalid_numeric_columns(grp_func):
+    df = pd.DataFrame(
+        [
+            ["foo", 1, True, {"a": 1, "b": 2}],
+            ["bar", 3, False, {"c": 1, "d": 2}],
+            ["foo", 2, False, {"c": 2, "e": 2}],
+            ["bar", 4, True, {"d": 1, "b": 3}],
+        ],
+        columns=["g_col", "int_col", "bool_col", "map_col"],
+    )
+
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(df, "g_col")
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        grp_func,
+    )
+
+
+@pytest.mark.parametrize("numeric_only", [True, False])
+@sql_count_checker(query_count=1)
+def test_groupby_empty_multi_column(numeric_only):
+    df = pd.DataFrame(data=[], columns=["A", "B", "C"])
+    gb = df.groupby(["A", "B"])
+    result = gb.sum(numeric_only=numeric_only)
+    index = native_pd.MultiIndex([[], []], [[], []], names=["A", "B"])
+    columns = ["C"] if not numeric_only else []
+    expected = native_pd.DataFrame([], columns=columns, index=index)
+
+    assert_frame_equal(result, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_multi_column_no_agg_colums(sort):
+    snow_df = pd.DataFrame(
+        data=[["foo", "bar"], ["foo", "zoo"], ["bar", "baz"], ["bar", "baz"]],
+        columns=["A", "B"],
+    )
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.groupby(["A", "B"], sort=sort).sum()
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_groupby_timestamp_nat():
+    snow_df = pd.DataFrame(
+        {
+            "values": np.random.randn(8),
+            "dt": [
+                np.nan,
+                Timestamp("2013-01-01"),
+                NaT,
+                Timestamp("2013-02-01"),
+                np.nan,
+                Timestamp("2013-02-01"),
+                NaT,
+                Timestamp("2013-01-01"),
+            ],
+        }
+    )
+    eval_snowpark_pandas_result(
+        snow_df, snow_df.to_pandas(), lambda df: df.groupby("dt").median()
+    )
+
+
+@pytest.mark.parametrize(
+    "grp_func",
+    [
+        lambda grp: grp.max(numeric_only=True),
+        lambda grp: grp.min(numeric_only=True),
+        lambda grp: grp.count(),
+        lambda grp: grp.sum(numeric_only=True),
+        lambda grp: grp.mean(numeric_only=True),
+        lambda grp: grp.median(numeric_only=True),
+        lambda grp: grp.std(numeric_only=True),
+        lambda grp: grp.var(numeric_only=True),
+    ],
+)
+@pytest.mark.parametrize("by", ["nan", "na", "nat", "none"])
+@sql_count_checker(query_count=2)
+def test_groupby_all_missing(grp_func, by):
+    missing_df = pd.DataFrame(
+        {
+            "nan": [np.nan, np.nan, np.nan, np.nan],
+            "na": [NA, NA, NA, NA],
+            "nat": [NaT, NaT, NaT, NaT],
+            "none": [None, None, None, None],
+            "values": [1, 2, 3, 4],
+        }
+    )
+    snowpark_pandas_groupby, pandas_groupby = eval_groupby_result(missing_df, by)
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        grp_func,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_engine_kwargs_warning(basic_snowpark_pandas_df, caplog):
+    engine_msg = (
+        "The argument `engine` of `groupby_max` has been ignored by Snowpark pandas API"
+    )
+    engine_kwargs_msg = "The argument `engine_kwargs` of `groupby_max` has been ignored by Snowpark pandas API"
+    caplog.clear()
+    with caplog.at_level(logging.WARNING):
+        basic_snowpark_pandas_df.groupby("col1").max()
+    assert engine_msg not in caplog.text
+    assert engine_kwargs_msg not in caplog.text
+
+    caplog.clear()
+    with caplog.at_level(logging.WARNING):
+        basic_snowpark_pandas_df.groupby("col1").max(
+            engine="cython", engine_kwargs={"test": True}
+        )
+    assert engine_msg in caplog.text
+    assert engine_kwargs_msg in caplog.text
+
+
+@sql_count_checker(query_count=0)
+def test_groupby_with_observed_warns(basic_snowpark_pandas_df, caplog):
+    msg = "CategoricalDType is not yet supported with Snowpark pandas API, the observed parameter is ignored."
+    caplog.clear()
+    with caplog.at_level(logging.WARNING):
+        basic_snowpark_pandas_df.groupby("col1", observed=True).max()
+    assert msg in caplog.text
+
+
+@pytest.mark.parametrize(
+    "level", [0, "B", [1, 1], [1, 0], ["A", "B"], [0, "A"], [-1, 0]]
+)
+@sql_count_checker(query_count=2)
+def test_groupby_with_level(df_multi, level):
+    df_multi_native = df_multi.to_pandas()
+    eval_snowpark_pandas_result(
+        df_multi,
+        df_multi_native,
+        lambda df: df.groupby(level=level).sum(),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_with_hier_columns():
+    tuples = list(
+        zip(
+            *[
+                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+                ["one", "two", "one", "two", "one", "two", "one", "two"],
+            ]
+        )
+    )
+    index = native_pd.MultiIndex.from_tuples(tuples)
+    columns = native_pd.MultiIndex.from_tuples(
+        [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")]
+    )
+    data = np.random.randn(8, 4)
+    snow_df = pd.DataFrame(data, index=index, columns=columns)
+    native_df = native_pd.DataFrame(data, index=index, columns=columns)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(level=0).count(),
+    )
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "C",  # single data column
+        ["C", "D"],  # all data columns
+        ["A"],  # single index column
+        ["A", "B"],  # all index columns
+        ["A", "D"],  # mix of index and data columns
+        ["A", "B", "C", "D"],  # all columns
+    ],
+)
+@sql_count_checker(query_count=2)
+@pytest.mark.parametrize("as_index", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_as_index(df_multi, by, as_index):
+    native_df = df_multi.to_pandas()
+    eval_snowpark_pandas_result(
+        df_multi,
+        native_df,
+        lambda df: df.groupby(by=by, as_index=as_index).median(),
+    )
+
+
+@pytest.mark.parametrize("min_count", [-1, 0, 1, 2, 3, 40])
+@sql_count_checker(query_count=1)
+def test_groupby_min_count_methods_with_na(min_count_method, min_count):
+    # create dataframe with 3 groups, and one column ts_na with groups contain missing value, and column ts with
+    # no missing value
+    native_df = native_pd.DataFrame(
+        {
+            "id": [2, 2, 2, 1, 3, 1],
+            "ts_na": [4.0, np.nan, np.nan, 5.0, np.nan, np.nan],
+            "ts": [3, 4, 8, 2, 0, 5],
+        }
+    )
+
+    snow_df = pd.DataFrame(native_df)
+    snowpark_pandas_groupby = snow_df.groupby("id")
+    pandas_groupby = native_df.groupby("id")
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        lambda grp: getattr(grp, min_count_method)(min_count=min_count),
+    )
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float64", "Float32", "boolean"])
+@pytest.mark.parametrize("min_count", [1, 2, 4, 50])
+@sql_count_checker(query_count=1)
+def test_groupby_min_count_methods_with_nullable_type(
+    min_count_method, dtype, min_count
+):
+    if dtype == "boolean":
+        # for boolean type the valid numeric value is 0 and 1, set the ts
+        # value to 0 to ensure we got valid data for the column
+        ts = 0
+    else:
+        ts = 4.0
+
+    native_df = native_pd.DataFrame(
+        {"id": [2, 2, 2, 1], "ts": [ts, pd.NA, ts + 1, pd.NA]}
+    )
+    native_df["ts"] = native_df["ts"].astype(dtype)
+    snow_df = pd.DataFrame(native_df)
+
+    snowpark_pandas_groupby = snow_df.groupby("id")
+    pandas_groupby = native_df.groupby("id")
+    eval_snowpark_pandas_result(
+        snowpark_pandas_groupby,
+        pandas_groupby,
+        lambda grp: getattr(grp, min_count_method)(min_count=min_count),
+    )
+
+
+@sql_count_checker(query_count=3)
+def test_groupby_agg_on_valid_variant_column(session, test_table_name):
+    pandas_df = native_pd.DataFrame(
+        {
+            "COL_0": ["a", "b", "a", "a", "b", "c"],
+            "COL_1": [2, 3, 1, 5, 4, 10],
+            "COL_2": ["aa", "ac", "dc", "ee", "bb", "de"],
+            "COL_3": [None, 3.2, None, None, 5.4, 7.0],
+            "COL_MIX_NUMERIC": [5, None, 1.4, 5.2, 4, None],
+        }
+    )
+    column_schema = [
+        ColumnSchema("COL_0", StringType()),
+        ColumnSchema("COL_1", VariantType()),
+        ColumnSchema("COL_2", VariantType()),
+        ColumnSchema("COL_3", VariantType()),
+        ColumnSchema("COL_MIX_NUMERIC", VariantType()),
+    ]
+    snowpark_pandas_df = create_snow_df_with_table_and_data(
+        session, test_table_name, column_schema, pandas_df.values.tolist()
+    )
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snowpark_pandas_df,
+            pandas_df,
+            lambda df: df.groupby(by="COL_0").agg(
+                {
+                    "COL_1": ["sum", "median"],
+                    "COL_2": ["min", "max"],
+                    "COL_3": "sum",
+                    "COL_MIX_NUMERIC": ["sum", "min"],
+                }
+            ),
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_dataframe_cumulative.py b/tests/integ/modin/groupby/test_groupby_dataframe_cumulative.py
new file mode 100644
index 00000000000..95420e0d32e
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_dataframe_cumulative.py
@@ -0,0 +1,278 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def native_df_multiindex() -> native_pd.DataFrame:
+    tuples = list(
+        zip(
+            *[
+                ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+                ["one", "two", "one", "two", "one", "two", "one", "two"],
+            ]
+        )
+    )
+    index = native_pd.MultiIndex.from_tuples(tuples)
+    columns = native_pd.MultiIndex.from_tuples(
+        [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")]
+    )
+    # Generate seeded random data.
+    rng = np.random.default_rng(12345)
+    data = rng.random(size=(8, 4))
+    native_df = native_pd.DataFrame(data, index=index, columns=columns)
+    return native_df
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "LEXLUTHOR",
+        ["LOBO", "DARKSEID"],
+        "DARKSEID",
+    ],
+)
+@pytest.mark.parametrize("func_name", ["cumsum", "cummin", "cummax"])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+def test_groupby_cumulative(by, func_name, dropna, as_index, sort, group_keys):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, 42.42, 42.42],
+            [1, 5, 6, 55.55, 55.55],
+            [2, 5, 8, None, None],
+            [2, 6, 9, 90099.95, 90099.95],
+            [None, 7, 10, 888.88, 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "RATING", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: getattr(
+                df.groupby(
+                    by=by,
+                    dropna=dropna,
+                    as_index=as_index,
+                    sort=sort,
+                    group_keys=group_keys,
+                ),
+                func_name,
+            )(numeric_only=True),
+        )
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        "LEXLUTHOR",
+        ["LOBO", "DARKSEID"],
+        ["DARKSEID", "FRIENDS"],
+        "FRIENDS",
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+def test_groupby_cumcount(by, dropna, as_index, sort, group_keys, ascending):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42, 42.42],
+            [1, 5, 6, "Lana", 55.55, 55.55],
+            [2, 5, 8, "Luma", None, None],
+            [2, 6, 9, None, 90099.95, 90099.95],
+            [None, 7, 10, "Cat", 888.88, 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=by,
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).cumcount(ascending=ascending),
+        )
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        ("A", "cat"),
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+def test_groupby_cumcount_with_multiindex(
+    native_df_multiindex, by, dropna, as_index, sort, group_keys, ascending
+):
+    snow_df = pd.DataFrame(native_df_multiindex)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            native_df_multiindex,
+            lambda df: df.groupby(
+                by=by,
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).cumcount(ascending=ascending),
+        )
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        1,
+        [2, 3],
+        [3, 4],
+        4,
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+def test_groupby_cumcount_with_numeric_names(
+    by, dropna, as_index, sort, group_keys, ascending
+):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42, 42.42],
+            [1, 5, 6, "Lana", 55.55, 55.55],
+            [2, 5, 8, "Luma", None, None],
+            [2, 6, 9, None, 90099.95, 90099.95],
+            [None, 7, 10, "Cat", 888.88, 888.88],
+        ],
+        columns=[1, 2, 3, 4, 5, 5],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=by,
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).cumcount(ascending=ascending),
+        )
+
+
+@pytest.mark.parametrize(
+    "by, level, axis, error",
+    [
+        # RATING is duplicate
+        ("RATING", None, 0, ValueError),
+        # by and level are set
+        ("LEXLUTHOR", 0, 0, NotImplementedError),
+        # non-zero level
+        (None, -1, 0, NotImplementedError),
+        # non-zero axis
+        ("LEXLUTHOR", 0, 1, NotImplementedError),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_groupby_cumcount_negative(by, level, axis, error):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42, 42.42],
+            [1, 5, 6, "Lana", 55.55, 55.55],
+            [2, 5, 8, "Luma", None, None],
+            [2, 6, 9, None, 90099.95, 90099.95],
+            [None, 7, 10, "Cat", 888.88, 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(error):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=by,
+                level=level,
+                axis=axis,
+                dropna=True,
+                as_index=True,
+                sort=True,
+                group_keys=True,
+            ).cumcount(ascending=True),
+        )
+
+
+@pytest.mark.parametrize(
+    "func_name, query_count", [("cumsum", 0), ("cummin", 1), ("cummax", 1)]
+)
+def test_groupby_cumlative_non_numeric(func_name, query_count):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42, 42.42],
+            [1, 5, 6, "Lana", 55.55, 55.55],
+            [2, 5, 8, "Luma", None, None],
+            [2, 6, 9, None, 90099.95, 90099.95],
+            [None, 7, 10, "Cat", 888.88, 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    with pytest.raises(NotImplementedError):
+        # All of cummax, cummin, and cumsum raise NotImplementedError
+        # for non-numeric columns in native pandas.
+        # Note that numeric_only is False by default for cummax and cummin,
+        # and is not a valid argument for cumsum.
+        getattr(pandas_df.groupby(by="LEXLUTHOR"), func_name)()
+
+    with SqlCounter(query_count=query_count):
+        if func_name == "cumsum":
+            # cumsum raises SnowparkSQLException
+            # for non-numeric columns in Snowpark pandas.
+            with pytest.raises(SnowparkSQLException):
+                getattr(snow_df.groupby(by="LEXLUTHOR"), func_name)().to_pandas()
+        else:
+            # cummax and cummin succeed on non-numeric columns in Snowpark pandas.
+            snow_df = getattr(snow_df.groupby(by="LEXLUTHOR"), func_name)().drop(
+                columns=["FRIENDS"]
+            )
+            pandas_df = getattr(pandas_df.groupby(by="LEXLUTHOR"), func_name)(
+                numeric_only=True
+            )
+            assert_frame_equal(
+                snow_df,
+                pandas_df,
+                check_dtype=False,
+            )
diff --git a/tests/integ/modin/groupby/test_groupby_dataframe_rank.py b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
new file mode 100644
index 00000000000..3bb4a4b455e
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_dataframe_rank.py
@@ -0,0 +1,356 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+TEST_RANK_DATA = [
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", np.nan, "a", "a", "a", np.nan, "b", np.nan, "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            "a": [np.nan, 4, 2, 3, 5, np.nan, 2, 4, np.nan],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            "a": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
+            "b": [2, 4, 2, np.nan, 5, 1, 2, np.nan, 1, 5],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
+            "b": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [
+                "cat",
+                "penguin",
+                "dog",
+                "spider",
+                "snake",
+                "dog",
+                "bear",
+                "dog",
+                "cat",
+                "snake",
+            ],
+        },
+        None,
+    ),
+    (
+        {"group": ["a", "a", "b", "a"], "Value": [4, -2, 4, 8]},
+        native_pd.MultiIndex.from_arrays(
+            [["A", "A", "B", "B"], [1, 2, 1, 2]], names=["Letter", "Number"]
+        ),
+    ),
+]
+
+TEST_RANK_DATA_MUL = [
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [2, 1, 2, 3, 3, 1, 2, 0, 1, 0],
+            "b": [2, 4, 5, 5, 5, 1, 2, 8, 1, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
+            "b": [2, 4, 2, np.nan, 5, 1, 2, np.nan, 1, 5],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
+            "b": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", np.nan, "a", "a", np.nan, "b", np.nan, "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1],
+            "b": [2, 4, 2, 3, 5, 4, 2, 4, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            "a": [2, 4, 2, 3, 5, 1],
+            "b": [2, 4, 5, 2, 5, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            "a": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            "b": [2, 4, 5, 5, 5, 1],
+        },
+        None,
+    ),
+    (
+        {"group": ["a", "a", "b", "a"], "a": [4, -2, 4, 8], "b": [4, 2, 6, 8]},
+        native_pd.MultiIndex.from_arrays(
+            [["A", "A", "B", "B"], [1, 2, 1, 2]], names=["Letter", "Number"]
+        ),
+    ),
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@pytest.mark.parametrize(
+    "dropna",
+    [True, False],
+)
+# test df. groupby rank with all method, na_option, ascending parameter combinations
+def test_df_groupby_rank(data, index, method, ascending, na_option, dropna):
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby("group", dropna=dropna).rank(
+            method=method, na_option=na_option, ascending=ascending
+        ),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@pytest.mark.parametrize(
+    "dropna",
+    [True, False],
+)
+# test df percentile rank
+def test_df_rank_pct(data, index, method, ascending, na_option, dropna):
+    snow_df = (
+        pd.DataFrame(data, index=index)
+        .groupby("group", dropna=dropna)
+        .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
+    )
+    native_df = (
+        native_pd.DataFrame(data, index=index)
+        .groupby("group", dropna=dropna)
+        .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA_MUL)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test df. groupby rank with multiple groupby columns and all method, na_option, ascending parameter combinations
+def test_df_groupby_rank_by_list(data, index, method, ascending, na_option):
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(["group", "a"]).rank(
+            method=method, na_option=na_option, ascending=ascending
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "by, level, axis, error",
+    [
+        # by and level are set
+        ("group", 0, 0, NotImplementedError),
+        # non-zero level
+        (None, -1, 0, NotImplementedError),
+        # non-zero axis
+        ("group", 0, 1, NotImplementedError),
+    ],
+)
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@pytest.mark.parametrize(
+    "pct",
+    [True, False],
+)
+def test_groupby_rank_negative(
+    by, level, axis, error, method, ascending, na_option, pct
+):
+    pandas_df = native_pd.DataFrame(
+        {
+            "group": [2, 2, 4, 5, 2, 4],
+            "value": [2, 4, 2, 3, 5, 1],
+        }
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(error):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=by,
+                level=level,
+                axis=axis,
+                dropna=True,
+                as_index=True,
+                sort=True,
+                group_keys=True,
+            ).rank(method=method, ascending=ascending, na_option=na_option, pct=pct),
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_dataframe_shift.py b/tests/integ/modin/groupby/test_groupby_dataframe_shift.py
new file mode 100644
index 00000000000..cc92ff17670
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_dataframe_shift.py
@@ -0,0 +1,145 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize(
+    "by",
+    [
+        "LEXLUTHOR",
+        ["LOBO", "DARKSEID"],
+        ["DARKSEID", "FRIENDS"],
+        "FRIENDS",
+        ["FRIENDS", "RATING"],
+    ],
+)
+@pytest.mark.parametrize("as_index", [True, False])
+def test_groupby_shift(periods, by, as_index):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42],
+            [1, 5, 6, "Lana", 55.55],
+            [2, 5, 8, "Luma", 76.76],
+            [2, 6, 9, "Lyla", 90099.95],
+            [3, 7, 10, "Cat", 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by=by, as_index=as_index).shift(periods=periods),
+        )
+
+
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize(
+    "by",
+    [
+        1,
+        [2, 3],
+        [3, 4],
+        4,
+        [4, 5],
+    ],
+)
+def test_groupby_shift_columns_with_numeric_names(periods, by):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42],
+            [1, 5, 6, "Lana", 55.55],
+            [2, 5, 8, "Luma", 76.76],
+            [2, 6, 9, "Lyla", 90099.95],
+            [3, 7, 10, "Cat", 888.88],
+        ],
+        columns=[1, 2, 3, 4, 5],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by).shift(periods=periods),
+        )
+
+
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+def test_groupby_shift_with_fill_string(periods):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42.42],
+            ["Lana", 55.55],
+            ["Luma", 76.76],
+            ["Lyla", 90099.95],
+            ["Cat", 888.88],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(["RATING"]).shift(
+                periods=periods, fill_value="mxyzptlk"
+            ),
+        )
+
+
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+def test_groupby_shift_with_fill_numeric(periods):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(["LEXLUTHOR"]).shift(
+                periods=periods, fill_value=4242
+            ),
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_shift_with_fill_string_for_numeric_column():
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby(["LEXLUTHOR"]).shift(periods=0, fill_value="mxyzptlk"),
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_default2pandas.py b/tests/integ/modin/groupby/test_groupby_default2pandas.py
new file mode 100644
index 00000000000..dfce8181534
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_default2pandas.py
@@ -0,0 +1,349 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import string
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._typing import Frequency
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.groupby.test_groupby_ngroups import assert_ngroups_equal
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+def getTimeSeriesData(nper=30, freq: Frequency = "B") -> dict[str, native_pd.Series]:
+    s = native_pd.Series(
+        np.random.default_rng(2).standard_normal(30),
+        index=native_pd.date_range(start="2000-01-01", periods=nper, freq=freq),
+        name=None,
+    )
+    return {c: s for c in string.ascii_uppercase[:4]}
+
+
+def makeTimeDataFrame(nper=30, freq: Frequency = "B") -> native_pd.DataFrame:
+    data = getTimeSeriesData(nper, freq)
+    return native_pd.DataFrame(data)
+
+
+@pytest.fixture
+def tsframe() -> native_pd.DataFrame:
+    return makeTimeDataFrame()[:5]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize("group_name", ["x", ["x"]])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_axis_1(group_name):
+    pandas_df = native_pd.DataFrame(
+        np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=["a", "b", "a", "b"]
+    )
+    pandas_df.index.name = "y"
+    pandas_df.columns.name = "x"
+
+    snow_df = pd.DataFrame(pandas_df)
+
+    eval_snowpark_pandas_result(
+        snow_df, pandas_df, lambda df: df.groupby(axis=1, by=group_name).max()
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("group_name", ["x", ["x"]])
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_groupby_axis_1_mi(group_name):
+    # test on MI column
+    iterables = [["bar", "baz", "foo"], ["one", "two"]]
+    mi = native_pd.MultiIndex.from_product(iterables=iterables, names=["x", "x1"])
+    pandas_df_mi = native_pd.DataFrame(
+        np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi
+    )
+    snow_df_mi = pd.DataFrame(pandas_df_mi)
+    eval_snowpark_pandas_result(
+        snow_df_mi, pandas_df_mi, lambda df: df.groupby(axis=1, by=group_name).sum()
+    )
+    eval_snowpark_pandas_result(
+        snow_df_mi, pandas_df_mi, lambda df: df.groupby(axis=1, level=0).min()
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "by",
+    [
+        lambda x: x // 3,
+        ["col1", lambda x: x // 3],
+        ["col1", lambda x: x + 1, lambda x: x % 3, "col2"],
+    ],
+)
+def test_groupby_with_callable_and_array(basic_snowpark_pandas_df, by) -> None:
+    pandas_df = basic_snowpark_pandas_df.to_pandas()
+    expected_query_count = 8
+    if isinstance(by, list):
+        expected_query_count = 9
+    with SqlCounter(query_count=expected_query_count, fallback_count=1, sproc_count=1):
+        eval_snowpark_pandas_result(
+            basic_snowpark_pandas_df, pandas_df, lambda df: df.groupby(by).min()
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_timeseries_groupby_with_callable(tsframe):
+    snow_ts_df = pd.DataFrame(tsframe)
+    eval_snowpark_pandas_result(
+        snow_ts_df,
+        tsframe,
+        lambda df: df.groupby(lambda x: x.month).agg(np.percentile, 80, axis=0),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "agg_func, args",
+    [
+        (lambda x: np.sum(x), []),  # callable
+        ([lambda x: np.sum(x), lambda x: np.max(x)], []),  # list of callable
+        (np.percentile, [80]),  # unsupported aggregation function
+        (np.quantile, [0.6]),  # unsupported aggregation function
+        ({"col2": "max", "col4": lambda x: np.sum(x)}, []),  # dict includes callable
+    ],
+)
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_groupby_agg_func_unsupported(basic_snowpark_pandas_df, agg_func, args):
+    by = "col1"
+    pandas_df = basic_snowpark_pandas_df.to_pandas()
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        pandas_df,
+        lambda df: df.groupby(by).agg(agg_func, *args),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "agg_func",
+    [lambda x: x * 2, np.sin, {"col2": "max", "col4": np.sin}],
+)
+@sql_count_checker(query_count=4)
+def test_groupby_invalid_agg_func_raises(basic_snowpark_pandas_df, agg_func):
+    by = "col1"
+    with pytest.raises(SnowparkSQLException):
+        basic_snowpark_pandas_df.groupby(by=by).aggregate(agg_func)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_groupby_with_numpy_array(basic_snowpark_pandas_df) -> None:
+    by = [1, 1, 4, 2, 2, 4]
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by=by).max(),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "by_list",
+    [[2, 1, 1, 2, 3, 3], [[2, 1, 1, 2, 3, 3], "a"]],
+)
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_groupby_series_with_numpy_array(series_multi_numeric, by_list) -> None:
+    eval_snowpark_pandas_result(
+        series_multi_numeric,
+        series_multi_numeric.to_pandas(),
+        lambda df: df.groupby(by=by_list).max(),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=22, fallback_count=2, sproc_count=2)
+def test_groupby_with_external_series(basic_snowpark_pandas_df) -> None:
+    series_data = [0, 1, 1, 0, 1]
+    native_series = native_pd.Series(series_data)
+    snowpark_pandas_series = pd.Series(native_series)
+
+    native_df = basic_snowpark_pandas_df.to_pandas()
+
+    result = basic_snowpark_pandas_df.groupby(by=snowpark_pandas_series).sum()
+    expected = native_df.groupby(by=native_series).sum()
+    # groupby result in snowflake gives a type with precise precision, which
+    # continue gives a more precise integer type with to_pandas. Since snowpark python
+    # update the to_pandas to convert integer to int64 for default precision, the
+    # native_df will have type int64, and the expected result will also have int64
+    # with native pandas now. Because dtype are all controlled by snowflake, and
+    # checking dtype is not the purpose of this test, we skip the dtype check here.
+    assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)
+
+    by_list = ["col1", "col2", snowpark_pandas_series]
+    native_by_list = ["col1", "col2", native_series]
+    result = basic_snowpark_pandas_df.groupby(by=by_list).sum()
+    expected = native_df.groupby(by=native_by_list).sum()
+    assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "mapper, level",
+    [
+        ({"foo": 0, "bar": 0, "baz": 1, "qux": 1}, 0),
+        ({"one": 0, "two": 0, "three": 1}, 1),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_level_mapper(mapper, level):
+    index = native_pd.MultiIndex(
+        levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
+        codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+        names=["first", "second"],
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            np.random.randn(10, 3),
+            index=index,
+            columns=native_pd.Index(["A", "B", "C"], name="exp"),
+        ),
+        lambda df: df.groupby(mapper, level=level).sum()
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "grp_agg",
+    [
+        lambda grp: grp.std(ddof=2),
+        lambda grp: grp.var(ddof=3),
+    ],
+)
+@pytest.mark.parametrize("by", ["col1", ["col5", "col1"]])
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_std_var_ddof_unsupported(basic_snowpark_pandas_df, grp_agg, by):
+    native_group = basic_snowpark_pandas_df.to_pandas().groupby(by)
+    snowpark_pandas_group = basic_snowpark_pandas_df.groupby(by)
+    eval_snowpark_pandas_result(snowpark_pandas_group, native_group, grp_agg)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "by", [native_pd.Grouper(key="col1"), ["col5", native_pd.Grouper(key="col1")]]
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_grouper_unsupported(basic_snowpark_pandas_df, by):
+    snowpark_pandas_group = basic_snowpark_pandas_df.groupby(by)
+    native_group = basic_snowpark_pandas_df.to_pandas().groupby(by)
+    eval_snowpark_pandas_result(
+        snowpark_pandas_group, native_group, lambda grp: grp.max()
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_ngroups_axis_1():
+    by = "x"
+    native_df = native_pd.DataFrame(
+        np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=["a", "b", "a", "b"]
+    )
+    native_df.index.name = "y"
+    native_df.columns.name = "x"
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by=by, axis=1),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_ngroups_axis_1_mi():
+    by = "x"
+    iterables = [["bar", "baz", "foo"], ["one", "two"]]
+    mi = native_pd.MultiIndex.from_product(iterables=iterables, names=["x", "x1"])
+    native_df = native_pd.DataFrame(
+        np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by=by, axis=1),
+        comparator=assert_ngroups_equal,
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_head_tail.py b/tests/integ/modin/groupby/test_groupby_head_tail.py
new file mode 100644
index 00000000000..ed15a132dda
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_head_tail.py
@@ -0,0 +1,159 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import numpy as np
+import pytest
+
+from tests.integ.modin.groupby.conftest import multiindex_data
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
+
+# Seeded random number generator.
+rng = np.random.default_rng(1234)
+
+
+@pytest.mark.parametrize("op_type", ["head", "tail"])
+@pytest.mark.parametrize("n", [0, 1, 5, 10, -3, -20])
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+class TestDataFrameGroupByHeadTail:
+    """
+    This is a class to test DataFrameGroupBy.head and DataFrameGroupBy.tail.
+    """
+
+    small_df_data = [
+        ["lion", 78, 50, 50, 50],
+        ["tiger", -35, 12, -378, 1246],
+        ["giraffe", 54, -9, 67, -256],
+        ["hippopotamus", 378, -537, -47, -789],
+        ["tiger", 89, 2, 256, 246],
+        ["tiger", -325, 2, 2, 5],
+        ["tiger", 367, -367, 3, -6],
+        ["giraffe", 25, 6, 312, 6],
+        ["lion", -5, -5, -3, -4],
+        ["lion", 15, 77, 2, 12],
+        ["giraffe", 100, 200, 300, 400],
+        ["hippopotamus", -100, -300, -600, -200],
+        ["rhino", 26, 2, -45, 14],
+        ["rhino", -7, 63, 257, -257],
+        ["lion", 1, 2, 3, 4],
+        ["giraffe", -5, -6, -7, 8],
+        ["lion", 1234, 456, 78, 9],
+    ]
+
+    @sql_count_checker(query_count=1)
+    def test_df_groupby_head_tail(self, op_type, n, dropna, as_index, sort, group_keys):
+        """
+        Test DataFrameGroupBy.head and DataFrameGroupBy.tail with a small df with no NA values.
+        """
+        eval_snowpark_pandas_result(
+            *create_test_dfs(
+                data=self.small_df_data,
+                columns=("species", "speed", "age", "weight", "height"),
+                index=list("ijklmnopabhqcdefg"),
+            ),
+            lambda df: df.groupby(
+                by="species",
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).__getattribute__(op_type)(n),
+            check_index_type=False,
+        )
+
+    @sql_count_checker(query_count=6)
+    def test_df_groupby_head_tail_large_data(
+        self, op_type, n, dropna, as_index, sort, group_keys, large_df_with_na_values
+    ):
+        """
+        Test DataFrameGroupBy.head and DataFrameGroupBy.tail with a large df with NA values
+        in both grouping columns: "color" and "random3".
+
+        Testing with a large set of values (100 rows) to ensure that we have enough data in each
+        group created when grouping by two columns: "color" and "random3".
+
+        Note that only 1 out of the 6 queries run is used for the DataFrameGroupBy.head/tail logic;
+        the 5 other queries are used to set up and tearing down the DataFrame this is being performed on:
+        1. create or replace temporary table TABLE_A.
+        2. select data from this table (TABLE_A).
+        3. drop the temporary table TABLE_A.
+        <enter DataFrameGroupBy.head/tail operation>
+        4. create a new temporary table to work on, TABLE_B.
+        5. select the required entries from the groups using a window function.
+        6. drop the temporary table TABLE_B.
+        """
+        eval_snowpark_pandas_result(
+            *large_df_with_na_values,
+            lambda df: df.groupby(
+                by=["color", "random3"],
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).__getattribute__(op_type)(n),
+            check_index_type=False,
+        )
+
+    @pytest.mark.parametrize("level", [0, 1])
+    @sql_count_checker(query_count=1)
+    def test_df_groupby_head_tail_with_multiindex_df(
+        self, op_type, level, n, dropna, as_index, sort, group_keys
+    ):
+        """
+        Test DataFrameGroupBy.head and DataFrameGroupBy.tail with a MultiIndex DataFrame.
+
+        Here, the MultiIndex DataFrames are grouped by `level` and not `by`.
+        """
+        snowpark_pandas_df, native_pandas_df = create_test_dfs(multiindex_data)
+        snowpark_pandas_df = snowpark_pandas_df.set_index(["A", "B"])
+        native_pandas_df = native_pandas_df.set_index(["A", "B"])
+
+        eval_snowpark_pandas_result(
+            snowpark_pandas_df,
+            native_pandas_df,
+            lambda df: df.groupby(
+                level=level,
+                dropna=dropna,
+                as_index=as_index,
+                sort=sort,
+                group_keys=group_keys,
+            ).__getattribute__(op_type)(n),
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("op_type", ["head", "tail"])
+@pytest.mark.parametrize("n", [2.5, "3"])
+@sql_count_checker(query_count=0)
+def test_df_groupby_head_tail_non_integer_n_negative(
+    op_type, n, basic_snowpark_pandas_df_with_missing_values
+):
+    """
+    Test that DataFrameGroupBy.head/tail only works with integer n values.
+    """
+    df = basic_snowpark_pandas_df_with_missing_values
+    with pytest.raises(TypeError, match="n must be an integer value."):
+        df.groupby("col1").__getattribute__(op_type)(n)
+
+
+@pytest.mark.parametrize("n", [0, 1, -2])
+@pytest.mark.parametrize("op_type", ["head", "tail"])
+@sql_count_checker(query_count=1)
+def test_df_groupby_head_tail_df_with_duplicate_columns(op_type, n):
+    data = [
+        [None, 1, 1, 0, None, 0],
+        [4, 5, None, 7, 4, 5],
+        [3.1, 8.0, 12, 10, 4, np.nan],
+        [17, 3, 16, 15, None, None],
+        [None, 3, -1, 3, -2, None],
+    ]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data, columns=["col1", "col2", "col2", "col5", "col5", "col6"]
+        ),
+        lambda df: df.groupby(by="col1").__getattribute__(op_type)(n),
+        check_index_type=False,
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
new file mode 100644
index 00000000000..5569fd9f62a
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py
@@ -0,0 +1,160 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.groupby.conftest import multiindex_data
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize("grouping_columns", ["B", ["A", "B"]])
+@pytest.mark.parametrize("skipna", [False, True])
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@sql_count_checker(query_count=1)
+def test_df_groupby_idxmax_idxmin_on_axis_0(
+    df_with_multiple_columns, grouping_columns, skipna, func
+):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin.
+    Here, the DataFrames are grouped by `by` and not `level`.
+    """
+    eval_snowpark_pandas_result(
+        *df_with_multiple_columns,
+        lambda df: df.groupby(by=grouping_columns).__getattribute__(func)(
+            skipna=skipna, axis=0
+        ),
+    )
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("level", [0, 1])
+@pytest.mark.parametrize("skipna", [False, True])
+@sql_count_checker(query_count=0)
+def test_df_groupby_idxmax_idxmin_with_multiindex_df(func, level, skipna):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin with a MultiIndex DataFrame.
+    Here, the MultiIndex DataFrames are grouped by `level` and not `by`.
+    """
+    # Create MultiIndex DataFrames.
+    df = pd.DataFrame(multiindex_data)
+    df = df.set_index(["A", "B"])
+
+    with pytest.raises(
+        NotImplementedError,
+        match=f"{func} is not yet supported when the index is a MultiIndex.",
+    ):
+        df.groupby(level=level).__getattribute__(func)(axis=0, skipna=skipna)
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@sql_count_checker(query_count=0)
+def test_df_groupby_idxmax_idxmin_on_axis_1_negative(df_with_multiple_columns, func):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin fail when axis=1.
+    """
+    df = df_with_multiple_columns[0]
+    with pytest.raises(
+        NotImplementedError,
+        match=f"DataFrameGroupBy.{func} with axis=1 is deprecated and will be"
+        f" removed in a future version",
+    ):
+        df.groupby(by="B").__getattribute__(func)(axis=1)
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("numeric_only", [True, False])
+@sql_count_checker(query_count=1)
+def test_groupby_idxmax_idxmin_with_different_column_dtypes_on_axis_0(
+    func, numeric_only
+):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin work with columns of different types.
+    """
+    data = {
+        "consumption": ["i", "am", "batman"],
+        "consumption2": ["i", "am", "batman"],
+        "co2_emissions": [37.2, 19.66, 1712],
+    }
+    index = ["Pork", "Wheat Products", "Beef"]
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            data=data,
+            index=index,
+        ),
+        lambda df: df.groupby("consumption").__getattribute__(func)(
+            axis=0, numeric_only=numeric_only
+        ),
+    )
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@sql_count_checker(query_count=1)
+def test_df_groupby_idxmax_idxmin_with_dates_on_axis_0(func):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin work with dates.
+    """
+    native_df = native_pd.DataFrame(
+        data={
+            "date_1": ["2000-01-01", "2000-01-01", "2000-01-03"],
+            "date_2": ["2000-01-04", "1999-12-18", "2005-01-03"],
+            "date_3": ["2001-01-04", "1990-12-18", "2025-01-03"],
+            "date_4": ["2010-01-04", "1989-12-18", "2009-01-03"],
+        },
+        index=[10, 17, 12],
+    )
+    for col in native_df.columns:
+        native_df[col] = native_pd.to_datetime(native_df[col])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: getattr(df.groupby(by="date_1"), func)(axis=0),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@sql_count_checker(
+    query_count=11,
+    fallback_count=1,
+    sproc_count=1,
+    high_count_expected=True,
+    high_count_reason="Snowpark pandas defaults to pandas for idxmax/idxmin when DataFrame is grouped by axis=1.",
+)
+def test_df_groupby_idxmax_idxmin_on_groupby_axis_1_default_to_pandas(func):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin.
+    Here, the DataFrames are grouped by `by` and should execute using native pandas.
+    Only testing with idxmax/idxmin axis=0 since we raise NotImplementedError when axis=1.
+    """
+    # Example from discussion comment:
+    # https://github.com/pandas-dev/pandas/issues/51203#issuecomment-1426864317
+    def grouper(c):
+        if c.startswith("A"):
+            return "ACCESS"
+        if c.startswith("B"):
+            return "BACKHAUL"
+        if c.startswith("C"):
+            return "CORE"
+
+    items = ["A_10", "A_20", "A_30", "B_10", "B_20", "B_30", "C_10", "C_20", "C_30"]
+    costs = np.random.default_rng().uniform(low=1, high=10_000, size=(50, len(items)))
+    df = native_pd.DataFrame(costs, columns=items)
+    native_res = df.groupby(by=grouper, axis=1).idxmax(axis=0)
+    snow_res = pd.DataFrame(df).groupby(by=grouper, axis=1).idxmax(axis=0)
+    assert_frame_equal(native_res, snow_res, check_index_type=False)
diff --git a/tests/integ/modin/groupby/test_groupby_negative.py b/tests/integ/modin/groupby/test_groupby_negative.py
new file mode 100644
index 00000000000..ae1a9e9d103
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_negative.py
@@ -0,0 +1,510 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from modin.pandas import Series
+from pandas.errors import SpecificationError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    ARRAY_DATA_AND_TYPE,
+    MAP_DATA_AND_TYPE,
+    MIXED_NUMERIC_STR_DATA_AND_TYPE,
+    TIMESTAMP_DATA_AND_TYPE,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "invalid_by, expected_query_count",
+    [
+        (["col1"], 0),
+        (None, 0),
+        ([], 0),
+        ("non_exist_by", 0),
+        (
+            ["col2", "non_exist_by"],
+            1,
+        ),  # non existing label in list leads to count query
+        (("col2", "col3"), 0),
+    ],
+)
+def test_invalid_by(invalid_by, expected_query_count) -> None:
+    snowpark_pandas_df = pd.DataFrame(
+        {
+            "col1": [0, 1, 1, 0],
+            "col2": [4, 5, 36, 7],
+            "col3": [3, 8, 12, 10],
+            "col4": [-1, 3, -1, 3],
+        }
+    )
+    # rename the columns to have duplicated column names
+    snowpark_pandas_df.columns = ["col1", "col2", "col3", "col1"]
+    pandas_df = snowpark_pandas_df.to_pandas()
+
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            snowpark_pandas_df,
+            pandas_df,
+            lambda df: df.groupby(by=invalid_by),
+            expect_exception=True,
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_invalid_none_label():
+    snowpark_pandas_df = pd.DataFrame(
+        {
+            "col1": [0, 1, 1, 0],
+            "col2": [4, 5, 36, 7],
+            "col3": [3, 8, 12, 10],
+        }
+    )
+    # In snowpark pandas, we recognize the index with no name as None also, which
+    # is different compare with pandas. In pandas, index with name None seems
+    # not recognized as None, investigate with (SNOW-888586).
+    snowpark_pandas_df = snowpark_pandas_df.set_index(["col1"])
+    eval_snowpark_pandas_result(
+        snowpark_pandas_df,
+        snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by=["col1", None]),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_pandas_series_by(basic_snowpark_pandas_df):
+    pandas_df = basic_snowpark_pandas_df.to_pandas()
+
+    # create pandas series for groupby
+    series_data = [0, 1, 0, 1]
+    pandas_series = native_pd.Series(series_data)
+    snowpark_pandas_series = Series(series_data)
+
+    # verify TypeError is raised when native pandas series
+    # is used as by columns, this is a snowflake specific error,
+    # which is different compare with native pandas
+    with pytest.raises(TypeError):
+        basic_snowpark_pandas_df.groupby(by=pandas_series)
+    with pytest.raises(TypeError):
+        basic_snowpark_pandas_df.groupby(by=pandas_df["col1"])
+    with pytest.raises(TypeError):
+        basic_snowpark_pandas_df.groupby(by=[pandas_series, snowpark_pandas_series])
+
+
+@pytest.mark.parametrize(
+    "agg_func, func_name",
+    [
+        (lambda df: df.groupby("grp_col").sum(numeric_only=False), "sum"),
+        (lambda df: df.groupby("grp_col").mean(numeric_only=False), "mean"),
+        (lambda df: df.groupby("grp_col").median(numeric_only=False), "median"),
+        (lambda df: df.groupby("grp_col").std(numeric_only=False), "stddev"),
+        (lambda df: df.groupby("grp_col").var(numeric_only=False), "variance"),
+        (
+            lambda df: df.groupby("grp_col").std(numeric_only=False, ddof=0),
+            "stddev_pop",
+        ),
+        (lambda df: df.groupby("grp_col").var(numeric_only=False, ddof=0), "var_pop"),
+    ],
+)
+@pytest.mark.parametrize(
+    "data, type_name",
+    [
+        TIMESTAMP_DATA_AND_TYPE,
+        ARRAY_DATA_AND_TYPE,
+        MAP_DATA_AND_TYPE,
+        MIXED_NUMERIC_STR_DATA_AND_TYPE,
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_groupby_numeric_only_func_invalid_non_numeric_column(
+    agg_func, func_name, data, type_name
+):
+    df = pd.DataFrame({"grp_col": ["M", "M", "W", "W"], "val_col": data})
+
+    # Sum array type is valid in pandas, but becomes invalid with Snowflake
+    # TODO (SNOW-899808): support sum with string and array type
+    message = "Invalid argument types for function"
+    if type_name == "Variant":
+        message = "Failed to cast variant value"
+    elif func_name == "median":
+        message = "incompatible types"
+
+    # native pandas typically raise type error for invalid aggregation, Snowpark SQL exception
+    # is raised with Snowpark pandas.
+    with pytest.raises(SnowparkSQLException, match=message):
+        # call to_pandas to trigger the evaluation of the operation
+        agg_func(df).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "agg_func, func_name",
+    [
+        (lambda df: df.groupby("grp_col").min(numeric_only=False), "min"),
+        (lambda df: df.groupby("grp_col").max(numeric_only=False), "max"),
+    ],
+)
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("data, type_name", [MAP_DATA_AND_TYPE, ARRAY_DATA_AND_TYPE])
+@sql_count_checker(query_count=0)
+def test_groupby_min_max_invalid_non_numeric_column(
+    agg_func, func_name, data, type_name
+):
+    df = pd.DataFrame({"grp_col": ["M", "M", "W", "W"], "val_col": data})
+    # min/max on array type is valid in pandas, but not valid with Snowflake.
+    message = f"Function {func_name.upper()} does not support {type_name.upper()} argument type"
+    with pytest.raises(SnowparkSQLException, match=message):
+        agg_func(df).to_pandas()
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_series_numeric_only_true(series_str):
+    message = "SeriesGroupBy does not implement numeric_only"
+    eval_snowpark_pandas_result(
+        series_str,
+        series_str.to_pandas(),
+        lambda se: se.groupby(by="grp_col").max(numeric_only=True),
+        expect_exception=True,
+        expect_exception_match=message,
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_as_index_raises(series_str):
+    eval_snowpark_pandas_result(
+        series_str,
+        series_str.to_pandas(),
+        lambda se: se.groupby(by="grp_col", as_index=False),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("level", [[0, 0], [], ["level", 0]])
+@sql_count_checker(query_count=0)
+def test_groupby_singleindex_invalid_level(level):
+    native_df = native_pd.DataFrame({"col1": [4, 5, 36, 7]}, index=[0, 1, 1, 0])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(level=level),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("level", [[0, 0], [], ["level", 0]])
+@sql_count_checker(query_count=0)
+def test_groupby_singleindex_invalid_level_name(level):
+    native_df = native_pd.DataFrame({"col1": [4, 5, 36, 7]}, index=[0, 1, 1, 0])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(level=level),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("level", [10, [-6, 1], [0, 3], [], "level"])
+@sql_count_checker(query_count=1)
+def test_groupby_multiindex_invalid_level(df_multi, level):
+    eval_snowpark_pandas_result(
+        df_multi,
+        df_multi.to_pandas(),
+        lambda se: se.groupby(level=level),
+        expect_exception=True,
+    )
+
+
+@pytest.mark.parametrize("level", [["level", 0], ["level", "A"]])
+@sql_count_checker(query_count=1)
+def test_groupby_multiindex_invalid_level_name_list(df_multi, level):
+    # when there are invalid level name in a list, Snowpark pandas raises KeyError, but native pandas
+    # raises AssertionError, we stay with KeyError which seems fits better in the context, and
+    # consistent across different APIs.
+    eval_snowpark_pandas_result(
+        df_multi,
+        df_multi.to_pandas(),
+        lambda se: se.groupby(level=level),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match="Level level not found",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_as_index_false_axis_1_raises(df_multi):
+    eval_snowpark_pandas_result(
+        df_multi,
+        df_multi.to_pandas(),
+        lambda se: se.groupby(level=0, axis=1, as_index=False),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="as_index=False only valid for axis=0",
+        assert_exception_equal=True,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_series_agg_dict_like_input_raise(series_str):
+    eval_snowpark_pandas_result(
+        series_str,
+        series_str.to_pandas(),
+        lambda se: se.groupby(level=0).aggregate({"col2": max}),
+        expect_exception=True,
+        expect_exception_type=SpecificationError,
+        expect_exception_match="Value for func argument in dict format is not allowed for SeriesGroupBy.",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_agg_dict_like_input_nested_renamer_raises(basic_snowpark_pandas_df):
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by="col1").aggregate(
+            {"col2": max, "col3": {"col3": "min"}}
+        ),
+        expect_exception=True,
+        expect_exception_type=SpecificationError,
+        expect_exception_match="Value for func argument with nested dict format is not allowed.",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_agg_dict_like_input_invalid_column_raises(basic_snowpark_pandas_df):
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by="col1").aggregate(
+            {"col2": max, "col_invalid": ["min"]}
+        ),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match=re.escape("Column(s) ['col_invalid'] do not exist"),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_as_index_false_with_dup(basic_snowpark_pandas_df) -> None:
+    by = ["col1", "col1"]
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby(by=by, as_index=False).max(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=re.escape("cannot insert col1, already exists"),
+    )
+
+
+# periods need to be of type integer - so this test raises a type error.
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("periods", [-5.5, 5.5, "mxyzptlk", -2.0, 2.0])
+def test_groupby_shift_non_integer_period_negative(periods):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42],
+            [1, 5, 6, "Lana", 55.55],
+            [2, 5, 8, "Luma", 76.76],
+            [2, 6, 9, "Lyla", 90099.95],
+            [3, 7, 10, "Cat", 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby("LEXLUTHOR").shift(periods=periods),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match="Periods must be integer",
+    )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize("by", ["LEXLUTHOR"])
+@pytest.mark.parametrize("freq", ["D"])
+def test_groupby_shift_freq_negative(periods, by, freq):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            [1, 2, 3, "Lois", 42.42],
+            [1, 5, 6, "Lana", 55.55],
+            [2, 5, 8, "Luma", 76.76],
+            [2, 6, 9, "Lyla", 90099.95],
+            [3, 7, 10, "Cat", 888.88],
+        ],
+        columns=["LEXLUTHOR", "LOBO", "DARKSEID", "FRIENDS", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(NotImplementedError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by).shift(periods=periods, freq=freq),
+        )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize(
+    "by",
+    [
+        ["LEXLUTHOR"],
+    ],
+)
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        4242,
+    ],
+)
+def test_groupby_shift_with_fill_multiindex_negative(periods, by, fill_value):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=pd.MultiIndex.from_tuples(
+            [(1, "tuna"), (2, "salmon"), (3, "catfish"), (0, "goldfish"), (1, "shark")],
+            names=["x", "y"],
+        ),
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(NotImplementedError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by).shift(periods=periods, fill_value=fill_value),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_groupby_shift_with_by_and_level_negative():
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(NotImplementedError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by=["LEXLUTHOR"], level=0).shift(periods=1),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_groupby_shift_with_external_by_negative():
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=["tuna", "salmon", "catfish", "goldfish", "shark"],
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(NotImplementedError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=["Lois", "Lois", "Lana", "Lana", "Lima"], level=0
+            ).shift(periods=1),
+        )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize(
+    "by",
+    [
+        ["X"],
+    ],
+)
+def test_groupby_shift_with_by_index_negative(by):
+    pandas_df = native_pd.DataFrame(
+        data=[
+            ["Lois", 42],
+            ["Lois", 42],
+            ["Lana", 76],
+            ["Lana", 76],
+            ["Lima", 888],
+        ],
+        columns=["LEXLUTHOR", "RATING"],
+        index=pd.MultiIndex.from_tuples(
+            [(1, "tuna"), (2, "salmon"), (3, "catfish"), (0, "goldfish"), (1, "shark")],
+            names=["X", "Y"],
+        ),
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(NotImplementedError):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(by).shift(periods=1),
+        )
+
+
+@pytest.mark.parametrize(
+    "agg_method_name", ["min", "max", "std", "var", "sum", "mean", "median"]
+)
+@pytest.mark.parametrize("numeric_only", ["TEST", 5])
+@sql_count_checker(query_count=0)
+def test_groupby_agg_invalid_numeric_only(
+    basic_snowpark_pandas_df, agg_method_name, numeric_only
+):
+    # This behavior is different compare with Native pandas. In Native pandas, if the value
+    # given for numeric_only is a non-boolean value, no error is thrown, and numeric_only is
+    # treated as True. This behavior is confusing to customer, in Snowpark pandas, we do an
+    # explicit type check, an errors it out if an invalid value is given.
+    with pytest.raises(
+        ValueError, match=re.escape('For argument "numeric_only" expected type bool')
+    ):
+        getattr(basic_snowpark_pandas_df.groupby("col1"), agg_method_name)(
+            numeric_only=numeric_only
+        )
+
+
+@pytest.mark.parametrize("min_count", ["TEST", 5.8, 3.0])
+@sql_count_checker(query_count=0)
+def test_groupby_agg_invalid_min_count(
+    basic_snowpark_pandas_df, min_count_method, min_count
+):
+    # This behavior is different compare with Native pandas. In Native pandas, if min_count value
+    # is not an integer, a warning about numeric_only is thrown, and the result dataframe is a
+    # groupby with no aggregation. This is really confusing, and fixed in pandas 2.x, where an
+    # error message is thrown.
+    # In Snowpark pandas, we do an explict check to throw an error message.
+    with pytest.raises(ValueError, match=re.escape("min_count must be an integer")):
+        getattr(basic_snowpark_pandas_df.groupby("col1"), min_count_method)(
+            min_count=min_count
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_ngroups.py b/tests/integ/modin/groupby/test_groupby_ngroups.py
new file mode 100644
index 00000000000..332e4c88eb1
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_ngroups.py
@@ -0,0 +1,142 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+def assert_ngroups_equal(snow_res, pd_res):
+    assert snow_res.ngroups == pd_res.ngroups
+
+
+@pytest.mark.parametrize("by", ["a", "b", ["a", "b"]])
+@sql_count_checker(query_count=2)
+def test_groupby_sort_multiindex_series(series_multi_numeric, by):
+
+    snow_ser = series_multi_numeric
+    native_ser = series_multi_numeric.to_pandas()
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.groupby(by=by),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups_series_nan():
+    index = native_pd.Index(["a", "b", "b", "a"])
+    index.names = ["grp_col"]
+    native_ser = native_pd.Series([390.0, 350.0, np.nan, 20.0], index=index)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.groupby(by="grp_col"),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups_series_nan_all():
+    index = native_pd.Index(["a", "b", "b", "a"])
+    index.names = ["grp_col"]
+
+    native_ser = native_pd.Series([np.nan, np.nan, np.nan, np.nan], index=index)
+
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.groupby(by="grp_col"),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups_series():
+    index = native_pd.Index(["a", "b", "b", "a"])
+    index.names = ["grp_col"]
+
+    native_ser = native_pd.Series(
+        [390.0, 350.0, 30.0, 20.0],
+        index=index,
+    )
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.groupby(by="grp_col"),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@pytest.mark.parametrize("by", ["A", ["A", "B"]])
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups(by):
+    native_df = native_pd.DataFrame({"A": list("aabbcccd"), "B": list("xxxxabcx")})
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by=by),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@pytest.mark.parametrize("by", ["c1", ["c1", "c2"], ["c1", "c2", "c1"]])
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups_nan(by):
+    native_df = native_pd.DataFrame(
+        {
+            "c1": [np.nan, 3, 4, 4, "b"],
+            "c2": [1, 2, 3, 4, 5],
+            "c3": [np.nan, np.nan, np.nan, np.nan, np.nan],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by=by),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@pytest.mark.parametrize("by", ["A", ["A", "B"]])
+@sql_count_checker(query_count=1)
+def test_groupby_ngroups_empty_cols(by):
+    native_df = native_pd.DataFrame({"A": [], "B": []})
+    snow_df = pd.DataFrame({"A": [], "B": []})
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by=by),
+        comparator=assert_ngroups_equal,
+    )
+
+
+@sql_count_checker(query_count=2)
+@pytest.mark.parametrize(
+    "level", [0, "B", [1, 1], [1, 0], ["A", "B"], [0, "A"], [-1, 0]]
+)
+@sql_count_checker(query_count=2)
+def test_groupby_ngroups_multiindex(df_multi, level):
+    eval_snowpark_pandas_result(
+        df_multi,
+        df_multi.to_pandas(),
+        lambda df: df.groupby(level=level),
+        comparator=assert_ngroups_equal,
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_nunique.py b/tests/integ/modin/groupby/test_groupby_nunique.py
new file mode 100644
index 00000000000..a70e18bf3cb
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_nunique.py
@@ -0,0 +1,82 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.pandas as pd
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "df,groupby_columns",
+    [
+        (
+            native_pd.DataFrame(
+                {
+                    "id": ["spam", "egg", "egg", "spam", "ham", "ham"],
+                    "value1": [1, 5, 5, 2, 5, 5],
+                    "value2": list("abbaxy"),
+                }
+            ),
+            "id",
+        ),
+        (
+            native_pd.DataFrame(
+                {
+                    "id": ["spam", None, "egg", "egg", "spam", "ham", "ham", None],
+                    "value1": [1, None, 5, None, 5, 2, 5, 5],
+                    "value2": list("abbaxy") + [None, None],
+                }
+            ),
+            "id",
+        ),
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=3)
+def test_groupby_nunique(df, groupby_columns, dropna):
+    snow_df = pd.DataFrame(df)
+
+    # Test using nunique via agg
+    eval_snowpark_pandas_result(
+        snow_df,
+        df,
+        lambda df: df.groupby(groupby_columns).agg("nunique", dropna=dropna),
+    )
+
+    # Test invoking nunique directly
+    eval_snowpark_pandas_result(
+        snow_df, df, lambda df: df.groupby(groupby_columns).nunique(dropna)
+    )
+
+    # Test invoking per column.
+    if dropna is False:
+        # pandas does not respect its own documentation here, when passing in dropna together with a dict of functions/names
+        # the parameter is not passed. Snowpark pandas API follows pandas documentation here.
+        snow_ans = snow_df.groupby(groupby_columns).agg(
+            {"value1": "count", "value2": "nunique"}, dropna=dropna
+        )
+        count_column = df[["id", "value1"]].groupby(groupby_columns).count()
+        nunique_column = (
+            df[["id", "value2"]].groupby(groupby_columns).nunique(dropna=False)
+        )
+        expected_ans = native_pd.merge(
+            count_column, nunique_column, left_index=True, right_index=True
+        )
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_ans, expected_ans
+        )
+    else:
+        eval_snowpark_pandas_result(
+            snow_df,
+            df,
+            lambda df: df.groupby(groupby_columns).agg(
+                {"value1": "count", "value2": "nunique"}, dropna=dropna
+            ),
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_property.py b/tests/integ/modin/groupby/test_groupby_property.py
new file mode 100644
index 00000000000..db3e714f4a6
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_property.py
@@ -0,0 +1,347 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from typing import Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.io.formats.printing import PrettyDict
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_dicts_equal, eval_snowpark_pandas_result
+
+PANDAS_SORT_BUG_REASON = """
+    pandas bug (this bug fixed in pandas 2.2): groupby.apply doesn't respect sort=False when grouping by a level of an index.
+    df = pd.DataFrame([], index=pd.MultiIndex.from_tuples([(3.1, 17), (1.1, 6)], names=['a', 'b']))
+    df.groupby('a', sort=True).apply(lambda group: 0)
+    df.groupby('a', sort=False).apply(lambda group: 0)
+    """
+
+
+def _pandas_groupby_groups_workaround(
+    df: native_pd.DataFrame, **groupby_kwargs: dict
+) -> dict:
+    """
+    Do pandas groupby.groups with fewer bugs.
+
+    This function exists to work around these bugs:
+
+        - https://github.com/pandas-dev/pandas/issues/35202: When grouping by a single column that has nulls with dropna=False, groupby.groups raises ValueError: Categorical categories cannot be null"
+        - https://github.com/pandas-dev/pandas/issues/55919: groupby.groups with multiple grouping columns always treats dropna as False.
+        - https://github.com/pandas-dev/pandas/issues/56851: groupby.groups and groupby.indices give incorrect result order when dropna=False
+        - https://github.com/pandas-dev/pandas/issues/56966: groupby.groups with multiple group keys always treats sort as True.
+
+    Args:
+        df: pandas dataframe
+        groupby_kwargs: groupby arguments
+
+    Returns:
+        PrettyDict mapping group labels to the portion of the index that falls into that group.
+    """
+    df["_snowpark_test_row_number_column"] = range(len(df))
+    # https://github.com/pandas-dev/pandas/issues/56965: groupby.groups and groupby.indices always treat as_index as True.
+    groupby_kwargs["as_index"] = True
+    return PrettyDict(
+        df.groupby(**groupby_kwargs)
+        # to get the group's index, select index values at all the row positions that are in this
+        # group.
+        .apply(
+            lambda group_df: df.index[group_df["_snowpark_test_row_number_column"]]
+        ).to_dict()
+    )
+
+
+def _pandas_groupby_indices_workaround(df, **groupby_kwargs) -> dict:
+    """
+    Do pandas groupby.indices with fewer bugs.
+
+    This function exists to work around this bug:
+
+        - https://github.com/pandas-dev/pandas/issues/56851: groupby.groups and groupby.indices give incorrect result order when dropna=False
+
+    Args:
+        df: pandas dataframe
+        groupby_kwargs: groupby arguments
+
+    Returns:
+        PrettyDict mapping group labels to the positions of rows that belong in that group.
+    """
+    df["_snowpark_test_row_number_column"] = range(len(df))
+    # https://github.com/pandas-dev/pandas/issues/56965: groupby.groups and groupby.indices always treat as_index as True.
+    groupby_kwargs["as_index"] = True
+    return (
+        df.groupby(**groupby_kwargs)
+        .apply(lambda group_df: group_df["_snowpark_test_row_number_column"].to_numpy())
+        .to_dict()
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        param(
+            dict(by="col1", dropna=False),
+            PrettyDict(
+                {0.0: pd.Index([3, 5]), 1.0: pd.Index([1, 2]), np.nan: pd.Index([0, 4])}
+            ),
+            id="pandas_issue_35202",
+        ),
+        param(
+            dict(by=["col1", "col2"], dropna=True),
+            PrettyDict(
+                {
+                    (0.0, 5.0): pd.Index([5]),
+                    (0.0, 7.0): pd.Index([3]),
+                    (1.0, 5.0): pd.Index([1]),
+                }
+            ),
+            id="pandas_issue_55919",
+        ),
+        param(
+            dict(by=["col1", "col2"], dropna=False),
+            PrettyDict(
+                {
+                    (0.0, 5.0): pd.Index([5]),
+                    (0.0, 7.0): pd.Index([3]),
+                    (1.0, 5.0): pd.Index([1]),
+                    (1.0, np.nan): pd.Index([2]),
+                    (np.nan, 4.0): pd.Index([0, 4]),
+                }
+            ),
+            id="pandas_issue_56851",
+        ),
+        param(
+            dict(by=["col1", "col2"], sort=False),
+            PrettyDict(
+                {
+                    (1.0, 5.0): pd.Index([1]),
+                    (0.0, 7.0): pd.Index([3]),
+                    (0.0, 5.0): pd.Index([5]),
+                }
+            ),
+            id="pandas_issue_56966",
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_pandas_groupby_groups_workaround(
+    basic_snowpark_pandas_df_with_missing_values, kwargs, expected
+):
+    """Test that this test module's version of pandas groupby.groups fixes the bugs that it's supposed to fix."""
+    assert_dicts_equal(
+        _pandas_groupby_groups_workaround(
+            basic_snowpark_pandas_df_with_missing_values.to_pandas(), **kwargs
+        ),
+        expected,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_pandas_groupby_indices_workaround_pandas_issue_56851(
+    basic_snowpark_pandas_df_with_missing_values,
+):
+    """Test that this test module's version of pandas groupby.indices fixes the bugs that it's supposed to fix."""
+    # pandas incorrectly switches the order of (0.0, 7.0) and (0.0, 5.0)
+    assert_dicts_equal(
+        _pandas_groupby_indices_workaround(
+            basic_snowpark_pandas_df_with_missing_values.to_pandas(),
+            by=["col1", "col2"],
+            dropna=False,
+            sort=False,
+        ),
+        {
+            (np.nan, 4.0): np.array([0, 4]),
+            (1.0, 5.0): np.array([1]),
+            (1.0, np.nan): np.array([2]),
+            (0.0, 7.0): np.array([3]),
+            (0.0, 5.0): np.array([5]),
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "groupby_property,pandas_workaround",
+    [
+        param("groups", _pandas_groupby_groups_workaround, id="groups"),
+        param("indices", _pandas_groupby_indices_workaround, id="indices"),
+    ],
+)
+@sql_count_checker(
+    # The to_pandas() to create the native frame causes one query,
+    # and the groupby.indices/groupby.groups causes another.
+    query_count=2,
+    join_count=0,
+)
+@pytest.mark.parametrize(
+    "index_cols,by,level",
+    [
+        (None, "col1", None),
+        (None, ["col1", "col2"], None),
+        ("col3", "col1", None),
+        ("col3", ["col1", "col2"], None),
+        (["col3", "col4"], "col1", None),
+        (["col3", "col4"], ["col1", "col2"], None),
+        (None, None, 0),
+        ("col3", "col3", None),
+        ("col3", None, 0),
+        ("col3", None, "col3"),
+        (["col3", "col4"], "col3", None),
+        (["col3", "col4"], None, (0, 1)),
+        (["col3", "col4"], None, (0, "col4")),
+    ],
+)
+@pytest.mark.parametrize(
+    "df_name",
+    ["basic_snowpark_pandas_df", "basic_snowpark_pandas_df_with_missing_values"],
+)
+@pytest.mark.parametrize("dropna", [True, False], ids=lambda v: f"dropna_{v}")
+@pytest.mark.parametrize(
+    "group_series", [True, False], ids=lambda v: f"group_series_{v}"
+)
+@pytest.mark.parametrize("as_index", [True, False], ids=lambda v: f"as_index_{v}")
+@pytest.mark.parametrize("sort", [True, False], ids=lambda v: f"sort_{v}")
+def test_groups_and_indices(
+    basic_snowpark_pandas_df,
+    basic_snowpark_pandas_df_with_missing_values,
+    df_name,
+    groupby_property,
+    index_cols,
+    by,
+    level,
+    dropna,
+    group_series,
+    as_index,
+    sort,
+    pandas_workaround,
+):
+    if index_cols == ["col3", "col4"] and by == "col3" and sort is False:
+        # test this case separately so that we can explicitly write out the
+        # expected result instead of further complicating the workaround
+        # pandas method.
+        pytest.skip(reason=PANDAS_SORT_BUG_REASON)
+
+    df = eval(df_name)
+
+    if index_cols is not None:
+        df.set_index(index_cols, inplace=True)
+
+    def get_property(df: Union[native_pd.DataFrame, pd.DataFrame]):
+        if isinstance(df, native_pd.DataFrame):
+            # note that group_series should have no effect on the expected
+            # output, so we can ignore it.
+            return pandas_workaround(
+                df, by=by, level=level, dropna=dropna, as_index=as_index, sort=sort
+            )
+        assert isinstance(df, pd.DataFrame)
+        dataframe_groupby = df.groupby(
+            by=by, level=level, dropna=dropna, as_index=as_index, sort=sort
+        )
+        groupby_to_use = (
+            dataframe_groupby["col5"] if group_series else dataframe_groupby
+        )
+        return getattr(groupby_to_use, groupby_property)
+
+    eval_snowpark_pandas_result(
+        df,
+        df.to_pandas(),
+        get_property,
+        comparator=assert_dicts_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_groups_grouping_by_single_index_column_with_sort_false(
+    basic_snowpark_pandas_df_with_missing_values,
+):
+    # Because sort=False, the group keys appear in the value they appear in
+    # the original dataframe, that is in basic_snowpark_pandas_df_with_missing_values.
+    # see PANDAS_SORT_BUG_REASON for why we test this case separately and cannot use pandas
+    # to create the expected output.
+    assert_dicts_equal(
+        basic_snowpark_pandas_df_with_missing_values.set_index(["col3", "col4"])
+        .groupby(by="col3", sort=False, dropna=False)
+        .groups,
+        PrettyDict(
+            {
+                3.1: pd.MultiIndex.from_arrays(
+                    [pd.Index([3.1], name="col3"), pd.Index([17.0], name="col4")]
+                ),
+                8.0: pd.MultiIndex.from_arrays(
+                    [pd.Index([8.0], name="col3"), pd.Index([3.0], name="col4")]
+                ),
+                12.0: pd.MultiIndex.from_arrays(
+                    [pd.Index([12.0], name="col3"), pd.Index([16.0], name="col4")]
+                ),
+                10.0: pd.MultiIndex.from_arrays(
+                    [pd.Index([10.0], name="col3"), pd.Index([15.0], name="col4")]
+                ),
+                4.0: pd.MultiIndex.from_arrays(
+                    [pd.Index([4.0], name="col3"), pd.Index([np.nan], name="col4")]
+                ),
+                np.nan: pd.MultiIndex.from_arrays(
+                    [pd.Index([np.nan], name="col3"), pd.Index([np.nan], name="col4")]
+                ),
+            }
+        ),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_indices_grouping_by_single_index_column_with_sort_false(
+    basic_snowpark_pandas_df_with_missing_values,
+):
+    # Because sort=False, the group keys appear in the value they appear in
+    # the original dataframe, that is in basic_snowpark_pandas_df_with_missing_values.
+    # see PANDAS_SORT_BUG_REASON for why we test this case separately and cannot use pandas
+    # to create the expected output.
+    assert_dicts_equal(
+        basic_snowpark_pandas_df_with_missing_values.set_index(["col3", "col4"])
+        .groupby(by="col3", sort=False, dropna=False)
+        .indices,
+        {
+            3.1: np.array([0]),
+            8.0: np.array([1]),
+            12.0: np.array([2]),
+            10.0: np.array([3]),
+            4.0: np.array([4]),
+            np.nan: np.array([5]),
+        },
+    )
+
+
+@pytest.mark.parametrize("groupby_property", ["groups", "indices"])
+@sql_count_checker(query_count=1, join_count=0)
+def test_groups_and_indices_column_name_conflicts_with_index_name(groupby_property):
+    native_df = native_pd.DataFrame(
+        {"col0": 0, "col1": 1}, index=native_pd.Index(["row0"], name="col0")
+    )
+    eval_snowpark_pandas_result(
+        pd.DataFrame(native_df),
+        native_df,
+        lambda df: getattr(df.groupby("col1"), groupby_property),
+        comparator=assert_dicts_equal,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_groups_subindex_type_matches_original_index_type():
+    """
+    the index type has to match the original index type, even if the subindex
+    for this group has objects of a less restrictive type. For example,
+    here we do groupby.groups on a dataframe with an index of object dtype.
+    The type of the index in groups[1] is 'object' even though that part of
+    the original index only contains the integer 3, so pd.Index([3]).dtype
+    would be 3.
+    """
+    native_df = native_pd.DataFrame({"key": [0, 1]}, index=["a", 3])
+    eval_snowpark_pandas_result(
+        pd.DataFrame(native_df),
+        native_df,
+        lambda df: df.groupby("key").groups,
+        comparator=assert_dicts_equal,
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_series.py b/tests/integ/modin/groupby/test_groupby_series.py
new file mode 100644
index 00000000000..2b2b1591705
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_series.py
@@ -0,0 +1,114 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize("by", ["a", ["b"], ["a", "b"]])
+@sql_count_checker(query_count=2)
+def test_groupby_sort_multiindex_series(series_multi_numeric, agg_method, by):
+    native_mseries_group = series_multi_numeric.to_pandas().groupby(by=by, sort=True)
+    mseries_group = series_multi_numeric.groupby(by=by, sort=True)
+    eval_snowpark_pandas_result(mseries_group, native_mseries_group, agg_method)
+
+
+@sql_count_checker(query_count=3)
+def test_groupby_sort_false_multiindex_series(series_multi_numeric):
+    # it is known that groupby sort=False is buggy with multiIndex, it is always
+    # sorting when only part of the level is used.
+    # https://github.com/pandas-dev/pandas/issues/17537
+    # The bug is fixed in 2.0.0, our behavior aligns with the fixed behavior.
+    # Once updated to 2.0.0.
+    # test_groupby_sort_false_multiindex_series is added to test the correct sort=False
+    # behavior, once updated to 2.0.0, we can merge this with test_groupby_sort_multiindex_series
+    # TODO (SNOW-890686): merge test_groupby_sort_false_multiindex_series and test_groupby_sort_multiindex_series
+    #       once Snowpark pandas is updated to align with pandas 2.0.x
+    result = series_multi_numeric.groupby("b", sort=False).max()
+    expected = native_pd.Series([1, 5], index=pd.Index([2, 1], name="b"))
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    eval_snowpark_pandas_result(
+        series_multi_numeric,
+        series_multi_numeric.to_pandas(),
+        lambda df: df.groupby(["a", "b"]).max(),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_groupby_series_count_with_nan():
+    index = native_pd.Index(["a", "b", "b", "a", "c"])
+    index.names = ["grp_col"]
+    series = pd.Series([1.2, np.nan, np.nan, np.nan, np.nan], index=index)
+    eval_snowpark_pandas_result(
+        series, series.to_pandas(), lambda se: se.groupby("grp_col").count()
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        "max",
+        np.min,
+        min,
+        np.median,
+        np.std,
+        "var",
+        [np.var],
+        ["sum", np.std],
+        ["sum", np.median, sum],
+    ],
+)
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=2)
+def test_groupby_agg_series(agg_func, sort):
+    index = native_pd.Index(["a", "b", "b", "a", "c"])
+    index.names = ["grp_col"]
+    series = pd.Series([3.5, 1.2, 4.3, 2.0, 1.8], index=index)
+
+    eval_snowpark_pandas_result(
+        series,
+        series.to_pandas(),
+        lambda se: se.groupby(by="grp_col", sort=sort).agg(agg_func),
+    )
+
+
+@pytest.mark.parametrize("numeric_only", [False, None])
+@sql_count_checker(query_count=2)
+def test_groupby_series_numeric_only(series_str, numeric_only):
+    native_series = series_str.to_pandas()
+    eval_snowpark_pandas_result(
+        series_str,
+        native_series,
+        lambda se: se.groupby(by="grp_col").max(numeric_only=numeric_only),
+    )
+
+
+@pytest.mark.parametrize("level", [0, 1, [1, 0], "b", [1, 1], [0, "b"], [-1]])
+@sql_count_checker(query_count=2)
+def test_groupby_sort_multiindex_series_level(series_multi_numeric, level):
+    native_series = series_multi_numeric.to_pandas()
+
+    eval_snowpark_pandas_result(
+        series_multi_numeric, native_series, lambda ser: ser.groupby(level=level).sum()
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_groupby_series_single_index():
+    snow_ser = pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
+    native_ser = native_pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.groupby(level=0).mean()
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_series_cumulative.py b/tests/integ/modin/groupby/test_groupby_series_cumulative.py
new file mode 100644
index 00000000000..77628676005
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_series_cumulative.py
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3, None, 5], [10.01, 20.02, 30.03, 40.04, None]]
+)
+@pytest.mark.parametrize("func_name", ["cumsum", "cummin", "cummax"])
+@pytest.mark.parametrize("dropna", [True, False])
+def test_groupby_cumulative_series(data, func_name, dropna):
+    pandas_df = native_pd.Series(
+        data, index=["tuna", "salmon", "catfish", "tuna", None]
+    )
+    snow_df = pd.Series(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: getattr(df.groupby(level=0, dropna=dropna), func_name)(),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, None, 5],
+        [10.01, 20.02, 30.03, 40.04, None],
+        ["une", None, "trois", "quatre", "cinq"],
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+def test_groupby_cumcount_series(data, dropna, ascending):
+    pandas_df = native_pd.Series(
+        data, index=["tuna", "salmon", "catfish", "tuna", None]
+    )
+    snow_df = pd.Series(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby(level=0, dropna=dropna).cumcount(ascending=ascending),
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_series_rank.py b/tests/integ/modin/groupby/test_groupby_series_rank.py
new file mode 100644
index 00000000000..e4839bbfd1e
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_series_rank.py
@@ -0,0 +1,183 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+TEST_RANK_DATA = [
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b"],
+            "a": [2, 4, 2, 3, 5, 1, 2, 4, 1],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b"],
+            "a": [np.nan, 4, 2, 3, 5, np.nan, 2, 4, np.nan],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b"],
+            "a": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+        },
+        None,
+    ),
+    (
+        {
+            "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
+            "a": [
+                "cat",
+                "penguin",
+                "dog",
+                "spider",
+                "snake",
+                "dog",
+                "bear",
+                "dog",
+                "cat",
+                "snake",
+            ],
+        },
+        None,
+    ),
+    (
+        {
+            "group": [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            "a": [np.nan, 4, 2, 3, 5, np.nan, 2, 4, np.nan],
+        },
+        None,
+    ),
+]
+
+
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@pytest.mark.parametrize(
+    "dropna",
+    [True, False],
+)
+@sql_count_checker(query_count=1)
+# test df. groupby rank with all method, na_option, ascending, dropna parameter combinations
+def test_series_groupby_rank(data, index, method, ascending, na_option, dropna):
+    snow_df = pd.DataFrame(data, index=index)
+    native_df = native_pd.DataFrame(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby("group", dropna=dropna)["a"].rank(
+            method=method, na_option=na_option, ascending=ascending
+        ),
+    )
+
+
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+@pytest.mark.parametrize(
+    "dropna",
+    [True, False],
+)
+@sql_count_checker(query_count=1)
+# test df percentile rank
+def test_series_rank_pct(data, index, method, ascending, na_option, dropna):
+    snow_df = (
+        pd.DataFrame(data, index=index)
+        .groupby("group", dropna=dropna)["a"]
+        .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
+    )
+    native_df = (
+        native_pd.DataFrame(data, index=index)
+        .groupby("group", dropna=dropna)["a"]
+        .rank(method=method, ascending=ascending, na_option=na_option, pct=True)
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
+
+
+@pytest.mark.parametrize(
+    "by, level, axis, error",
+    [
+        # by and level are set
+        ("group", 0, 0, NotImplementedError),
+        # non-zero level
+        (None, -1, 0, NotImplementedError),
+        # non-zero axis
+        ("group", 0, 1, ValueError),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_groupby_ser_rank_negative(by, level, axis, error):
+    pandas_df = native_pd.DataFrame(
+        {
+            "group": [2, 2, 4, 5, 2, 4],
+            "value": [2, 4, 2, 3, 5, 1],
+        }
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    with pytest.raises(error):
+        eval_snowpark_pandas_result(
+            snow_df,
+            pandas_df,
+            lambda df: df.groupby(
+                by=by,
+                level=level,
+                axis=axis,
+                dropna=True,
+                as_index=True,
+                sort=True,
+                group_keys=True,
+            )["value"].rank(ascending=True),
+        )
diff --git a/tests/integ/modin/groupby/test_groupby_series_shift.py b/tests/integ/modin/groupby/test_groupby_series_shift.py
new file mode 100644
index 00000000000..eec905e977e
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_series_shift.py
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3, 4, 5], ["une", "deux", "trois", "quatre", "cinq"]]
+)
+def test_groupby_shift_series(periods, data):
+    pandas_df = native_pd.Series(
+        data, index=["tuna", "salmon", "catfish", "goldfish", "shark"]
+    )
+    snow_df = pd.Series(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df, pandas_df, lambda df: df.groupby(level=0).shift(periods=periods)
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize("data", [["une", "deux", "trois", "quatre", "cinq"]])
+@pytest.mark.parametrize("fill_value", ["sept", "something magical"])
+def test_groupby_shift_series_fill_string(periods, data, fill_value):
+    pandas_df = native_pd.Series(
+        data, index=["tuna", "salmon", "catfish", "goldfish", "shark"]
+    )
+    snow_df = pd.Series(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby(level=0).shift(periods=periods, fill_value=fill_value),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1, 2, 3, -3, -2, -1])
+@pytest.mark.parametrize("data", [[1, 2, 3, 4, 5]])
+@pytest.mark.parametrize("fill_value", [10000, -10000])
+def test_groupby_shift_series_fill_numeric(periods, data, fill_value):
+    pandas_df = native_pd.Series(
+        data, index=["tuna", "salmon", "catfish", "goldfish", "shark"]
+    )
+    snow_df = pd.Series(pandas_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        pandas_df,
+        lambda df: df.groupby(level=0).shift(periods=periods, fill_value=fill_value),
+    )
diff --git a/tests/integ/modin/groupby/test_groupby_transform.py b/tests/integ/modin/groupby/test_groupby_transform.py
new file mode 100644
index 00000000000..3d629493aeb
--- /dev/null
+++ b/tests/integ/modin/groupby/test_groupby_transform.py
@@ -0,0 +1,100 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import numpy as np
+import pytest
+
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize(
+    "func",
+    [
+        "mean",
+        "count",
+        np.sqrt,
+        np.square,
+        lambda df: df * 10,
+        lambda _: 3,
+        lambda df: df.min() + df.max(),
+    ],
+)
+@pytest.mark.parametrize("grouping_columns", ["B", ["A", "B"]])
+def test_dataframe_groupby_transform(
+    dropna, as_index, group_keys, sort, func, grouping_columns, df_with_multiple_columns
+):
+    """
+    Test DataFrameGroupBy.transform with some basic functions.
+    """
+    # - A UDTF is created to run `groupby.transform(func)` on every group via `apply`.
+    # - One join always occurs when joining the original DataFrame's table with the
+    #   temporary function's resultant table.
+    # - A second join is performed only when the groupby object specifies dropna=True.
+    #   This is because a loc set operation is being performed to correctly set NA values.
+    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
+        eval_snowpark_pandas_result(
+            *df_with_multiple_columns,
+            lambda df: df.groupby(
+                by=grouping_columns,
+                dropna=dropna,
+                as_index=as_index,
+                group_keys=group_keys,
+                sort=sort,
+            ).transform(func)
+        )
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("group_keys", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize(
+    "func, args, kwargs",
+    [
+        (lambda df, arg1, arg2: (df.max() * arg1) + arg2, [2], {"arg2": -25}),
+        (lambda df, arg1, arg2: df.mean() + arg1 * arg2, [2, 3], {}),
+        (
+            lambda df, arg1, arg2, arg3, arg4, arg5: df.head(arg1)
+            + (arg4 * arg5) / (arg2 * arg3),
+            [1, 2, 3],
+            {"arg5": 4, "arg4": 5},
+        ),
+    ],
+)
+@pytest.mark.parametrize("grouping_columns", ["B", ["A", "B"]])
+def test_dataframe_groupby_transform_with_func_args_and_kwargs(
+    dropna,
+    as_index,
+    group_keys,
+    sort,
+    func,
+    args,
+    kwargs,
+    grouping_columns,
+    df_with_multiple_columns,
+):
+    """
+    Test DataFrameGroupby.transform with functions that require *args and **kwargs.
+    """
+    # - A UDTF is created to run `groupby.transform(func)` on every group via `apply`.
+    # - One join always occurs when joining the original DataFrame's table with the
+    #   temporary function's resultant table.
+    # - A second join is performed only when the groupby object specifies dropna=True.
+    #   This is because a loc set operation is being performed to correctly set NA values.
+    with SqlCounter(query_count=6, join_count=1 + (1 if dropna else 0), udtf_count=1):
+        eval_snowpark_pandas_result(
+            *df_with_multiple_columns,
+            lambda df: df.groupby(
+                by=grouping_columns,
+                dropna=dropna,
+                as_index=as_index,
+                group_keys=group_keys,
+                sort=sort,
+            ).transform(func, *args, **kwargs)
+        )
diff --git a/tests/integ/modin/groupby/test_grouping.py b/tests/integ/modin/groupby/test_grouping.py
new file mode 100644
index 00000000000..28ceac69243
--- /dev/null
+++ b/tests/integ/modin/groupby/test_grouping.py
@@ -0,0 +1,212 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.core.groupby.generic import DataFrameGroupBy as PandasDFGroupBy
+
+import snowflake.snowpark.modin.plugin  # noqa : F401
+
+# Because we cannot overwrite submodules of modin.pandas, we have to import these objects from
+# snowflake.snowpark.modin.pandas
+from snowflake.snowpark.modin.pandas.groupby import (
+    DataFrameGroupBy as SnowparkPandasDFGroupBy,
+    SeriesGroupBy as SnowparkPandasSerGroupBy,
+)
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def single_row_dfs():
+    native_df = native_pd.DataFrame([[1, 2]], columns=["A", "B"])
+    snow_df = pd.DataFrame(native_df)
+    return native_df, snow_df
+
+
+# TODO (SNOW-887758): add test with as_index once as_index is handled correctly
+@pytest.mark.parametrize("by", ["col1", ["col1", "col2"], ["col2", "col2"]])
+@pytest.mark.parametrize(
+    "cols",
+    ["col3", ["col3"], ["col4", "col3", "col3"], ["col3", "col4", "col5", "col3"]],
+)
+@sql_count_checker(query_count=2)
+def test_column_select(basic_snowpark_pandas_df, by, cols):
+    native_df = basic_snowpark_pandas_df.to_pandas()
+    snow_grp_col = basic_snowpark_pandas_df.groupby(by=by)[cols]
+    native_grp_col = native_df.groupby(by=by)[cols]
+
+    if isinstance(native_grp_col, PandasDFGroupBy):
+        assert isinstance(snow_grp_col, SnowparkPandasDFGroupBy)
+    else:
+        assert isinstance(snow_grp_col, SnowparkPandasSerGroupBy)
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(snow_grp_col, native_grp_col, lambda grp: grp.sum())
+
+
+@pytest.mark.parametrize("level", [0, [0, 1], [1, 1]])
+@pytest.mark.parametrize(
+    "cols",
+    ["C", ["D"], ["C", "D"], ["D", "C", "D"]],
+)
+@sql_count_checker(query_count=2)
+def test_column_select_with_level(df_multi, level, cols):
+    native_df = df_multi.to_pandas()
+    snow_grp_col = df_multi.groupby(level=level)[cols]
+    native_grp_col = native_df.groupby(level=level)[cols]
+
+    if isinstance(native_grp_col, PandasDFGroupBy):
+        assert isinstance(snow_grp_col, SnowparkPandasDFGroupBy)
+    else:
+        assert isinstance(snow_grp_col, SnowparkPandasSerGroupBy)
+
+    eval_snowpark_pandas_result(snow_grp_col, native_grp_col, lambda grp: grp.sum())
+
+
+@pytest.mark.parametrize("by", ["col1", ["col1", "col2"], ["col2", "col2"]])
+@sql_count_checker(query_count=2)
+def test_column_select_via_attr(basic_snowpark_pandas_df, by):
+    native_pandas_df = basic_snowpark_pandas_df.to_pandas()
+    expected_count = 1
+    with SqlCounter(query_count=expected_count):
+        eval_snowpark_pandas_result(
+            basic_snowpark_pandas_df,
+            native_pandas_df,
+            lambda df: df.groupby(by=by).col3.sum(),
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_getitem_list_of_columns_with_indexing():
+    df = pd.DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+            "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+            "C": np.random.randn(8),
+            "D": np.random.randn(8),
+            "E": np.random.randn(8),
+        }
+    )
+
+    result = df.groupby("A")[df.columns[2:4]].median()
+
+    native_df = df.to_pandas()
+    expected = native_df.loc[:, ["A", "C", "D"]].groupby("A").median()
+
+    assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="SNOW-1057810: Indexing groupby with unwrapped lists should fail",
+)
+@sql_count_checker(query_count=2)
+def test_getitem_single_tuple_of_columns(basic_snowpark_pandas_df):
+    # from pandas 2.0.0, select column with single tuple like ("col2", "col3") will be deprecated.
+    # select set of columns must use a list df.groupby("col1")[["col2", "col3"]]
+    # This test will fail once updated to 2.0.0.
+    eval_snowpark_pandas_result(
+        basic_snowpark_pandas_df,
+        basic_snowpark_pandas_df.to_pandas(),
+        lambda df: df.groupby("col1")["col2", "col3"].sum(),
+    )
+
+
+@pytest.mark.parametrize("cols", ["C", ["A", "C"]])
+@sql_count_checker(query_count=0)
+def test_select_bad_cols_raise(cols, single_row_dfs):
+    native_df, snow_df = single_row_dfs
+    snow_g = snow_df.groupby("A")
+    native_g = native_df.groupby("A")
+    eval_snowpark_pandas_result(
+        snow_g,
+        native_g,
+        lambda g: g[["C"]],
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match="Columns not found: 'C'",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_select_overlapped_by_cols_raise(single_row_dfs):
+    _, snow_df = single_row_dfs
+
+    # we currently do not support select data columns that overlaps with by columns, like
+    # df.groupby("A")["A", "C"], where column "A" occurs in both the groupby and column selection.
+    # This is because in regular groupby, one by column cannot be mapped to multiple columns,
+    # for example with a dataframe have columns=['A', 'B', 'A'], where 'A' corresponds to two columns,
+    # df.groupby('A') will raise an error. However, with getitem, the new columns selected
+    # is treated differently and they can be duplicate of the by column. For example: it is valid to
+    # have df.groupby("A")["A", "A", "C"] even though the result dataframe after colum select have
+    # multiple column "A".
+    # In order to handle this correctly, we need to record the column to exclude during by label matching.
+    # Since Modin does not support this case also, we raise an exception and deffer the support for now.
+    # TODO (SNOW-894942): Handle getitem overlap with groupby column
+    message = (
+        "Data column selection with overlap of 'by' columns is not yet supported, "
+        "please duplicate the overlapped by columns and rename it to a different name"
+    )
+    with pytest.raises(NotImplementedError, match=message):
+        snow_df.groupby("A")["A", "B"]
+
+
+@sql_count_checker(query_count=1)
+def test_select_index_cols_raise(df_multi):
+    native_df = df_multi.to_pandas()
+    eval_snowpark_pandas_result(
+        df_multi,
+        native_df,
+        lambda df: df.groupby("C")["A"],
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        # this message is slightly different with native pandas with quotes, native pandas not found message
+        # wraps the label with quotes for data columns but not index columns, which is a wired behavior, here
+        # we just stay consistent across different column types.
+        expect_exception_match="Columns not found: 'A'",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_select_cols_with_axis_1_raise(single_row_dfs):
+    native_df, snow_df = single_row_dfs
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby("A", axis=1)["B"],
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot subset columns when using axis=1",
+    )
+
+
+@pytest.mark.parametrize("as_index", [True, False])
+@sql_count_checker(query_count=1)
+def test_as_index_select_single_column(as_index):
+    native_df = native_pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df, native_df, lambda df: df.groupby("A", as_index=as_index)["B"].max()
+    )
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="SNOW-1057819: Investigate whether groupby operations should drop df.columns.name",
+)
+@sql_count_checker(query_count=1)
+def test_groupby_as_index_select_column_sum_empty_df():
+    native_df = native_pd.DataFrame(
+        columns=native_pd.Index(["A", "B", "C"], name="alpha"), dtype="int64"
+    )
+    snow_df = pd.DataFrame(native_df)
+    # the result for this is an empty dataframe with column alpha, "A", "B"
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.groupby(by="A", as_index=False)["B"].sum(numeric_only=False),
+    )
diff --git a/tests/integ/modin/groupby/test_min_max.py b/tests/integ/modin/groupby/test_min_max.py
new file mode 100644
index 00000000000..e5d8ad2cc6a
--- /dev/null
+++ b/tests/integ/modin/groupby/test_min_max.py
@@ -0,0 +1,177 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+# tests pulled from pandas/pandas/tests/groupby/test_min_max.py
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    MIXED_NUMERIC_STR_DATA_AND_TYPE,
+    assert_frame_equal,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    create_test_dfs,
+    eval_snowpark_pandas_result,
+)
+
+
+@sql_count_checker(query_count=0)
+def test_max_min_non_numeric():
+    aa = pd.DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]})
+
+    result = aa.groupby("nn").max()
+    assert "ss" in result
+
+    result = aa.groupby("nn").max(numeric_only=False)
+    assert "ss" in result
+
+    result = aa.groupby("nn").min()
+    assert "ss" in result
+
+    result = aa.groupby("nn").min(numeric_only=False)
+    assert "ss" in result
+
+
+@sql_count_checker(query_count=2)
+def test_aggregate_numeric_object_dtype():
+    # simplified case: multiple object columns where one is all-NaN
+    # -> gets split as the all-NaN is inferred as float
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
+            dtype=object,
+        ),
+        lambda df: df.groupby("key").min(),
+    )
+    # same but with numbers
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
+            dtype=object,
+        ),
+        lambda df: df.groupby("key").min(),
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        (lambda df: df.groupby("b").min()),
+        (lambda df: df.groupby("b").max()),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_min_max_date(agg_func):
+    # GH26321
+    dates = native_pd.to_datetime(
+        native_pd.Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d"
+    ).dt.date
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"b": [0, 1, 1], "c": dates}), agg_func
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func",
+    [
+        (lambda df: df.groupby("id").min()),
+        (lambda df: df.groupby("id").max()),
+        (lambda df: df.groupby("id").count()),
+    ],
+)
+@pytest.mark.parametrize(
+    "dtype", ["Int64", "Int32", "Float64", "Float32", "boolean", "string"]
+)
+@sql_count_checker(query_count=1)
+def test_groupby_min_max_nullable(agg_func, dtype):
+    if dtype == "boolean":
+        ts = 0
+    else:
+        ts = 4.0
+
+    native_df = native_pd.DataFrame(
+        {"id": [2, 2, 2, 1], "ts": [ts, pd.NA, ts + 1, pd.NA]}
+    )
+    native_df["ts"] = native_df["ts"].astype(dtype)
+
+    df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(df, native_df, agg_func)
+
+
+@pytest.mark.parametrize("method", ["min", "max"])
+@pytest.mark.parametrize("min_count", [1, 3, 50])
+@sql_count_checker(query_count=1)
+def test_groupby_min_count_string_nullable(method, min_count):
+    native_df = native_pd.DataFrame(
+        {"id": [2, 2, 2, 1], "ts": ["a", pd.NA, "b", pd.NA]}
+    )
+    native_df["ts"] = native_df["ts"].astype("string")
+    snow_df = pd.DataFrame(native_df)
+
+    snowpark_pandas_groupby = snow_df.groupby("id")
+    pandas_groupby = native_df.groupby("id")
+
+    if min_count > 2:
+        # min_count doesn't work with string nullable dtypes in pandas. With above test case,
+        #       id  ts
+        # 0     2   a
+        # 1     2   <NA>
+        # 2     2   b
+        # 3     1   <NA>
+        # after we call df.groupby("id").max(min_count=4) in native pandas we got result
+        # id    ts
+        # 1     <NA>
+        # 2     a
+        # However, the groupby with value "2" only have 2 valid record, and should have result <NA>.
+        # In Snowpark, we provide the correct behavior, and will end up with following result
+        # id    ts
+        # 1     <NA>
+        # 2     <NA>
+        expected = native_pd.DataFrame({"id": [1, 2], "ts": [np.nan, np.nan]})
+        expected = expected.set_index("id")
+        result = getattr(snowpark_pandas_groupby, method)(min_count=min_count)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(result, expected)
+    else:
+        eval_snowpark_pandas_result(
+            snowpark_pandas_groupby,
+            pandas_groupby,
+            lambda grp: getattr(grp, method)(min_count=min_count),
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_min_max_with_mixed_str_numeric_type():
+    mixed_data, _ = MIXED_NUMERIC_STR_DATA_AND_TYPE
+    pandas_df = native_pd.DataFrame(
+        {
+            "col_grp": ["a", "a", "b", "b"],
+            "col_mixed": mixed_data,
+        }
+    )
+    snow_df = pd.DataFrame(pandas_df)
+    result_max = snow_df.groupby(by="col_grp").max()
+    # This behavior is different compare with native pandas, in native pandas
+    # min/max comparison between string and numeric value is invalid. However,
+    # it is valid with snowflake.
+    expected_df = native_pd.DataFrame(
+        {
+            "col_grp": ["a", "b"],
+            "col_mixed": ["A", 2.5],
+        }
+    )
+    expected_df = expected_df.set_index("col_grp")
+    assert_frame_equal(result_max, expected_df, check_dtype=False)
+
+    result_min = snow_df.groupby(by="col_grp").min()
+    expected_df = native_pd.DataFrame(
+        {
+            "col_grp": ["a", "b"],
+            "col_mixed": [1.0, 2.5],
+        }
+    )
+    expected_df = expected_df.set_index("col_grp")
+    assert_frame_equal(result_min, expected_df, check_dtype=False)
diff --git a/tests/integ/modin/groupby/test_quantile.py b/tests/integ/modin/groupby/test_quantile.py
new file mode 100644
index 00000000000..dae66700072
--- /dev/null
+++ b/tests/integ/modin/groupby/test_quantile.py
@@ -0,0 +1,362 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.mark.parametrize(
+    "interpolation,use_fallback",
+    [
+        ("linear", False),
+        pytest.param(
+            "lower",
+            True,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        ("nearest", False),
+        pytest.param(
+            "midpoint",
+            True,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "a_vals,b_vals",
+    [
+        # Ints
+        ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]),
+        ([1, 2, 3, 4, 5], [4, 3, 2, 1]),
+        # Floats
+        ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]),
+        # Missing data
+        ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]),
+        ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]),
+        # Timestamps
+        pytest.param(
+            native_pd.date_range("1/1/18", freq="D", periods=5),
+            native_pd.date_range("1/1/18", freq="D", periods=5)[::-1],
+            # strict=False because it will pass for fallbacks
+            marks=pytest.mark.xfail(
+                strict=False, reason="SNOW-1003587: timestamp quantile is unsupported"
+            ),
+        ),
+        # as_unit is not supported for DatetimeIndex in pandas 1.5.x
+        # (
+        #    native_pd.date_range("1/1/18", freq="D", periods=5).as_unit("s"),
+        #    native_pd.date_range("1/1/18", freq="D", periods=5)[::-1].as_unit("s"),
+        # ),
+        # All NA
+        ([np.nan] * 5, [np.nan] * 5),
+    ],
+)
+@pytest.mark.parametrize("q", [0, 0.5, 1])
+def test_quantile(interpolation, use_fallback, a_vals, b_vals, q):
+    if use_fallback and running_on_public_ci():
+        pytest.skip(reason="slow fallback test")
+    if (
+        interpolation == "nearest"
+        and q == 0.5
+        and isinstance(b_vals, list)
+        and b_vals == [4, 3, 2, 1]
+    ):
+        pytest.xfail(
+            reason="Unclear numpy expectation for nearest "
+            "result with equidistant data"
+        )
+    all_vals = native_pd.concat([native_pd.Series(a_vals), native_pd.Series(b_vals)])
+
+    a_expected = native_pd.Series(a_vals).quantile(q, interpolation=interpolation)
+    b_expected = native_pd.Series(b_vals).quantile(q, interpolation=interpolation)
+
+    df = pd.DataFrame(
+        {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": all_vals}
+    )
+
+    expected = native_pd.DataFrame(
+        [a_expected, b_expected],
+        columns=["val"],
+        index=native_pd.Index(["a", "b"], name="key"),
+    )
+    with SqlCounter(
+        query_count=8, fallback_count=1, sproc_count=1
+    ) if use_fallback else SqlCounter(query_count=1):
+        result = df.groupby("key").quantile(q, interpolation=interpolation)
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_quantile_array():
+    # https://github.com/pandas-dev/pandas/issues/27526
+    df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
+    key = np.array([0, 0, 1, 1, 1], dtype=np.int64)
+    result = df.groupby(key).quantile([0.25])
+
+    index = native_pd.MultiIndex.from_product([[0, 1], [0.25]])
+    expected = native_pd.DataFrame({"A": [0.25, 2.50]}, index=index)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
+    index = native_pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
+
+    key = np.array([0, 0, 1, 1], dtype=np.int64)
+    result = df.groupby(key).quantile([0.25, 0.75])
+    expected = native_pd.DataFrame(
+        {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_quantile_array_list_like_q():
+    # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959
+    arr = np.random.RandomState(0).randint(0, 5, size=(10, 3), dtype=np.int64)
+    df = pd.DataFrame(arr, columns=list("ABC"))
+    result = df.groupby("A").quantile([0.3, 0.7])
+    expected = native_pd.DataFrame(
+        {
+            "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0],
+            "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0],
+        },
+        index=native_pd.MultiIndex.from_product(
+            [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None]
+        ),
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_quantile_array_no_sort():
+    df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
+    key = np.array([1, 0, 1], dtype=np.int64)
+    result = df.groupby(key, sort=False).quantile([0.25, 0.5, 0.75])
+    expected = native_pd.DataFrame(
+        {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    result = df.groupby(key, sort=False).quantile([0.75, 0.25])
+    expected = native_pd.DataFrame(
+        {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_quantile_array_multiple_levels():
+    df = pd.DataFrame(
+        {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
+    )
+    result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
+    index = native_pd.MultiIndex.from_tuples(
+        [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
+        names=["c", "d", None],
+    )
+    expected = native_pd.DataFrame(
+        {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)])
+@pytest.mark.parametrize("groupby", [[0], [0, 1]])
+@pytest.mark.parametrize("q", [[0.5, 0.6]])
+def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q):
+    # GH30289
+    nrow, ncol = frame_size
+    df = pd.DataFrame(
+        np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)
+    )
+
+    idx_levels = [np.arange(min(nrow, 4))] * len(groupby) + [q]
+    idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [
+        list(range(len(q))) * min(nrow, 4)
+    ]
+    expected_index = pd.MultiIndex(
+        levels=idx_levels, codes=idx_codes, names=groupby + [None]
+    )
+    expected_values = [
+        [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q
+    ]
+    expected_columns = [x for x in range(ncol) if x not in groupby]
+    expected = native_pd.DataFrame(
+        expected_values, index=expected_index, columns=expected_columns
+    )
+
+    if frame_size == (100, 10):
+        # The 3 extra queries occur because Snowpark creates a temp table for larger dataframes;
+        # 1 query creates the temp table, 1 inserts into it, and 1 drops the temp table.
+        expected_query_count = 11
+    else:
+        expected_query_count = 8
+
+    with SqlCounter(query_count=expected_query_count, fallback_count=1, sproc_count=1):
+        result = df.groupby(groupby).quantile(q)
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@sql_count_checker(query_count=0)
+def test_quantile_raises():
+    df = pd.DataFrame(
+        [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]
+    )
+
+    with pytest.raises(SnowparkSQLException):
+        df.groupby("key").quantile().to_pandas()
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=9)
+def test_quantile_out_of_bounds_q_raises():
+    # https://github.com/pandas-dev/pandas/issues/27470
+    df = pd.DataFrame({"a": [0, 0, 0, 1, 1, 1], "b": range(6)})
+    g = df.groupby([0, 0, 0, 1, 1, 1])
+    with pytest.raises(SnowparkSQLException):
+        g.quantile(50)
+
+    with pytest.raises(SnowparkSQLException):
+        g.quantile(-1)
+
+
+@pytest.mark.parametrize(
+    "key, val, expected_key, expected_val",
+    [
+        ([1.0, np.nan, 3.0, np.nan], range(4), [1.0, 3.0], [0.0, 2.0]),
+        ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]),
+        (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]),
+        ([0], [42], [0], [42.0]),
+        # TODO (SNOW-863809): The dtype got changed in to_pandas from float64 to,
+        #  object, which causes the test to fail.
+        # ([], [], np.array([], dtype="float64"), np.array([], dtype="float64")),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_quantile_missing_group_values_correct_results(
+    key, val, expected_key, expected_val
+):
+    # GH 28662, GH 33200, GH 33569
+    df = pd.DataFrame({"key": key, "val": val})
+
+    expected = native_pd.DataFrame(
+        expected_val, index=native_pd.Index(expected_key, name="key"), columns=["val"]
+    )
+
+    grp = df.groupby("key")
+
+    result = grp.quantile(0.5)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    result = grp.quantile()
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "interpolation, val1, val2", [("lower", 2, 2), ("higher", 2, 3), ("nearest", 2, 2)]
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_quantile_all_na_group_masked(interpolation, val1, val2):
+    # GH#37493
+    df = pd.DataFrame({"a": [1, 1, 1, 2], "b": [1, 2, 3, pd.NA]})
+    result = df.groupby("a").quantile(q=[0.5, 0.7], interpolation=interpolation)
+    expected = native_pd.DataFrame(
+        {"b": [val1, val2, pd.NA, pd.NA]},
+        index=native_pd.MultiIndex.from_arrays(
+            [native_pd.Series([1, 1, 2, 2], dtype=np.int64), [0.5, 0.7, 0.5, 0.7]],
+            names=["a", None],
+        ),
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_groupby_quantile_nonmulti_levels_order():
+    # Non-regression test for GH #53009
+    ind = pd.MultiIndex.from_tuples(
+        [
+            (0, "a", "B"),
+            (0, "a", "A"),
+            (0, "b", "B"),
+            (0, "b", "A"),
+            (1, "a", "B"),
+            (1, "a", "A"),
+            (1, "b", "B"),
+            (1, "b", "A"),
+        ],
+        names=["sample", "cat0", "cat1"],
+    )
+    ser = pd.Series(range(8), index=ind)
+    result = ser.groupby(level="cat1", sort=False).quantile([0.2, 0.8])
+
+    qind = native_pd.MultiIndex.from_tuples(
+        [("B", 0.2), ("B", 0.8), ("A", 0.2), ("A", 0.8)], names=["cat1", None]
+    )
+    expected = native_pd.Series([1.2, 4.8, 2.2, 5.8], index=qind)
+
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
diff --git a/tests/integ/modin/io/test_read_csv.py b/tests/integ/modin/io/test_read_csv.py
new file mode 100644
index 00000000000..374ce1f6816
--- /dev/null
+++ b/tests/integ/modin/io/test_read_csv.py
@@ -0,0 +1,777 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+import os
+import uuid
+from io import StringIO
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+from tests.utils import IS_WINDOWS, TestFiles, Utils
+
+tmp_stage_name1 = Utils.random_stage_name()
+test_file_csv = "testCSV.csv"
+
+
+# these tests have high query_counts since
+# ...
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup(session, resources_path):
+    test_files = TestFiles(resources_path)
+    Utils.create_stage(session, tmp_stage_name1, is_temporary=True)
+    Utils.upload_to_stage(
+        session, "@" + tmp_stage_name1, test_files.test_file_csv, compress=False
+    )
+
+    yield
+    # tear down the resources after yield (pytest fixture feature)
+    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended
+    session.sql(f"DROP STAGE IF EXISTS {tmp_stage_name1}").collect()
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv():
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename = f"test_read_csv_{str(uuid.uuid4())}"
+    try:
+        df.to_csv(filename, index=False)
+        assert_frame_equal(
+            pd.read_csv(filename),
+            native_pd.read_csv(filename),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+def test_read_csv_header(resources_path):
+    test_files = TestFiles(resources_path)
+
+    filename = test_files.test_file_csv_header
+
+    with SqlCounter(query_count=9):
+        assert_frame_equal(
+            pd.read_csv(filename, header=None),
+            native_pd.read_csv(filename, header=None),
+            check_dtype=False,
+        )
+
+    with SqlCounter(query_count=9):
+        assert_frame_equal(
+            pd.read_csv(filename, header=0),
+            native_pd.read_csv(filename, header=0),
+            check_dtype=False,
+        )
+
+
+@pytest.mark.parametrize("header", [[0, 1, 3], 2])
+@sql_count_checker(query_count=0)
+def test_read_csv_header_negative(resources_path, header):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(NotImplementedError, match="header is not implemented."):
+        pd.read_csv(test_files.test_file_csv, header=header)
+
+
+@sql_count_checker(query_count=0)
+def test_read_csv_header_skiprows_negative(resources_path):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(NotImplementedError, match="header is not implemented."):
+        pd.read_csv(test_files.test_file_csv, header=1, skiprows=1)
+
+
+@pytest.mark.parametrize(
+    "names",
+    [
+        ["c1", "c2", "c3"],
+        [1, "2", 3],
+        [1, 2, 3],
+        ["long test name", ("tuple_val", 3), 190],
+        ("c1", "c2", "c3"),
+        native_pd.Series(["c1", "c2", "c3"]),
+        native_pd.Index(["c1", "c2", "c3"]),
+        ["c1", "c2"],
+        ["UPPER_CASE_NAME", "UPPER_CASE_NAME_2", "UPPER_CASE_NAME_3"],
+        [
+            'UPPER_"QUOTED_NAME"',
+            "UPPER_CASE_NAME WHITESPACE",
+            'UPPER_CASE_" QUOTED   WHITESPACE"  ',
+        ],
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_names(resources_path, names):
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_csv(test_files.test_file_csv, names=names)
+    got = pd.read_csv(test_files.test_file_csv, names=names)
+
+    assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_names_overwrite_header(resources_path):
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header, names=["c1", "c2", "c3"], header=0
+    )
+    got = pd.read_csv(
+        test_files.test_file_csv_header, names=["c1", "c2", "c3"], header=0
+    )
+
+    assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "names, error_msg, expected_query_count",
+    [
+        (
+            ("c1", "c2", "c3", "c4"),
+            "Too many columns specified: expected 4 and found 3",
+            8,
+        ),
+        (
+            native_pd.Series(["c1", "c2", "c3", "c4"]),
+            "Too many columns specified: expected 4 and found 3",
+            8,
+        ),
+        (
+            native_pd.Index(["c2", "c3", "c4", "c5"]),
+            "Too many columns specified: expected 4 and found 3",
+            8,
+        ),
+        (["c1", "c1"], "Duplicate names are not allowed.", 0),
+        (native_pd.Index(["c1", "c1"]), "Duplicate names are not allowed.", 0),
+        (native_pd.Series(["c1", "c1"]), "Duplicate names are not allowed.", 0),
+        (["c1", "c2", "c2"], "Duplicate names are not allowed.", 0),
+    ],
+)
+def test_read_csv_name_negative(resources_path, names, error_msg, expected_query_count):
+    test_files = TestFiles(resources_path)
+
+    with SqlCounter(query_count=expected_query_count):
+        with pytest.raises(ValueError, match=error_msg):
+            pd.read_csv(test_files.test_file_csv_header, names=names)
+
+
+@sql_count_checker(query_count=0)
+def test_read_csv_name_invalid_type_negative(resources_path):
+    test_files = TestFiles(resources_path)
+
+    names = [1, [2, 3], 4]
+
+    with pytest.raises(TypeError, match="unhashable type: 'list'"):
+        pd.read_csv(test_files.test_file_csv_header, names=names)
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_diff_dataypes():
+
+    df = native_pd.DataFrame(
+        [
+            ("c1", "c2", "c3", "c4"),
+            (
+                [datetime.date(2023, 1, 1), None],
+                str,
+                "str",
+                native_pd.Series(["2023-01-01 00:00:00", "NaT"]),
+            ),
+            (
+                [datetime.date(2023, 1, 1), None],
+                type,
+                "str",
+                native_pd.Series(
+                    [
+                        "<class 'pandas._libs.tslibs.timestamps.Timestamp'>",
+                        "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+                    ]
+                ),
+            ),
+            # data in TIME column will be encoded as pd.Timedelta (instead of pd.Timestamp!),
+            # and NULL will be encoded as pd.NaT in vectorized udf
+            (
+                [datetime.time(1, 2, 3), None],
+                str,
+                "str",
+                native_pd.Series(["0 days 01:02:03", "NaT"]),
+            ),
+            (
+                [datetime.time(1, 2, 3), None],
+                type,
+                "str",
+                native_pd.Series(
+                    [
+                        "<class 'pandas._libs.tslibs.timedeltas.Timedelta'>",
+                        "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+                    ]
+                ),
+            ),
+            (
+                [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+                str,
+                "str",
+                native_pd.Series(["2023-01-01 01:02:03", "NaT"]),
+            ),
+            (
+                [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+                type,
+                "str",
+                native_pd.Series(
+                    [
+                        "<class 'pandas._libs.tslibs.timestamps.Timestamp'>",
+                        "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+                    ]
+                ),
+            ),
+            # data in TIMESTAMP_TZ column will be encoded as pd.Timestamp,
+            # and NULL will be encoded as None in vectorized udf
+            (
+                [
+                    datetime.datetime(
+                        2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc
+                    ),
+                    datetime.datetime(2023, 1, 1, 1, 2, 3),
+                    None,
+                ],
+                str,
+                "str",
+                native_pd.Series(
+                    ["2023-01-01 01:02:03+00:00", "2023-01-01 01:02:03-08:00", "None"]
+                ),
+            ),
+            (
+                [
+                    datetime.datetime(
+                        2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc
+                    ),
+                    None,
+                ],
+                type,
+                "str",
+                native_pd.Series(
+                    [
+                        "<class 'pandas._libs.tslibs.timestamps.Timestamp'>",
+                        "<class 'NoneType'>",
+                    ]
+                ),
+            ),
+        ]
+    )
+    filename = f"test_read_csv_diff_datatypes_{str(uuid.uuid4())}"
+    try:
+        df.to_csv(filename, index=False)
+        assert_frame_equal(
+            pd.read_csv(filename),
+            native_pd.read_csv(filename),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+@pytest.mark.skipif(
+    IS_WINDOWS,
+    reason="files cannot be named with certain reserved characters in Windows",
+)
+@pytest.mark.parametrize("wildcard", ["*", "?"])
+@sql_count_checker(query_count=9)
+def test_read_csv_filepath_glob_pattern(wildcard):
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename_a = f"test_read_csv_b_filepath_glob_pattern_{str(uuid.uuid4())}"
+    filename_b = f"test_read_csv_a_filepath_glob_pattern_{str(uuid.uuid4())}"
+    filename_wildcard = (
+        f"test_read_csv_{wildcard}_filepath_glob_pattern_{str(uuid.uuid4())}"
+    )
+
+    try:
+        df.to_csv(filename_a, index=False)
+        df.to_csv(filename_b, index=False)
+        df.to_csv(filename_wildcard, index=False)
+        assert_frame_equal(
+            pd.read_csv(filename_wildcard),
+            native_pd.read_csv(filename_wildcard),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename_a):
+            os.remove(filename_a)
+
+        if os.path.exists(filename_b):
+            os.remove(filename_b)
+
+        if os.path.exists(filename_wildcard):
+            os.remove(filename_wildcard)
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_filepath_starting_with_stage_symbol():
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename = f"@test_read_csv_backslash_{str(uuid.uuid4())}"
+
+    try:
+        df.to_csv(filename, index=False)
+        assert_frame_equal(
+            pd.read_csv(rf"\{filename}"),
+            native_pd.read_csv(filename),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+@sql_count_checker(query_count=0)
+def test_read_csv_filepath_negative():
+    with pytest.raises(
+        NotImplementedError,
+        match="filepath_or_buffer must be a path to a file or folder stored locally or on a Snowflake stage.",
+    ):
+        pd.read_csv(StringIO("a,b\n1,2"))
+
+
+@pytest.mark.parametrize(
+    "param,arg",
+    [
+        ("engine", "c"),
+        ("cache_dates", True),
+        ("infer_datetime_format", True),
+        ("chunksize", 1000),
+        ("memory_map", True),
+        ("storage_options", {}),
+        ("low_memory", True),
+        ("float_precision", "high"),
+        ("dtype_backend", "numpy_nullable"),
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_with_warning_params(param, arg):
+
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename = f"test_read_csv_with_warning_params_{str(uuid.uuid4())}"
+    try:
+        df.to_csv(filename, index=False)
+        assert_frame_equal(
+            pd.read_csv(filename, **{param: arg}),
+            native_pd.read_csv(filename),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_no_sep():
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename = f"test_read_csv_no_sep_{str(uuid.uuid4())}"
+
+    try:
+        df.to_csv(filename, index=False, sep=",")
+        assert_frame_equal(
+            pd.read_csv(filename),
+            native_pd.read_csv(filename),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_delimiter():
+    df = native_pd.DataFrame({"c1": [1, 2], "c2": ["qwe", 3], "c3": [4, 5]})
+    filename = f"test_read_csv_delimiter_{str(uuid.uuid4())}"
+
+    try:
+        df.to_csv(filename, index=False, sep=";")
+        assert_frame_equal(
+            pd.read_csv(filename, delimiter=";"),
+            native_pd.read_csv(filename, delimiter=";"),
+            check_dtype=False,
+        )
+    finally:
+        if os.path.exists(filename):
+            os.remove(filename)
+
+
+@sql_count_checker(query_count=0)
+def test_read_csv_sep_delimiter_negative(resources_path):
+    test_files = TestFiles(resources_path)
+    with pytest.raises(
+        ValueError, match="Specified a sep and a delimiter; you can only specify one."
+    ):
+        pd.read_csv(test_files.test_file_csv_colon, sep=";", delimiter=";")
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_misc_parameters(resources_path):
+    test_files = TestFiles(resources_path)
+    got = pd.read_csv(
+        test_files.test_file_csv_colon,
+        sep=";",
+        encoding="utf-8",
+        na_values=("one", "two"),
+        compression="infer",
+        skiprows=1,
+        header=None,
+    )
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_colon,
+        sep=";",
+        encoding="utf-8",
+        compression="infer",
+        na_values=("one", "two"),
+        skiprows=1,
+        header=None,
+    )
+    assert_frame_equal(got, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=8)
+def test_read_csv_stage(resources_path):
+    got = pd.read_csv(f"@{tmp_stage_name1}/{test_file_csv}")
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(test_files.test_file_csv)
+
+    assert_frame_equal(got, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "param,arg",
+    [
+        ("verbose", True),
+        ("dayfirst", False),
+        ("date_parser", True),
+        ("date_format", "%Y-%m-%d"),
+        ("keep_date_col", True),
+        ("parse_dates", True),
+        ("iterator", True),
+        ("na_filter", True),
+        ("skipfooter", 3),
+        ("nrows", 100),
+        ("thousands", ","),
+        ("decimal", ","),
+        ("lineterminator", "q"),
+        ("dialect", "excel"),
+        ("quoting", 0),
+        ("doublequote", True),
+        ("encoding_errors", "strict"),
+        ("comment", "#"),
+        ("converters", {"c1": lambda x: x * 2}),
+        ("true_values", ["qwe"]),
+        ("false_values", ["qwe"]),
+        ("keep_default_na", False),
+        ("delim_whitespace", True),
+        ("skipinitialspace", True),
+        ("on_bad_lines", "skip"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_read_csv_negative(param, arg):
+    with pytest.raises(NotImplementedError, match=f"{param} is not implemented."):
+        pd.read_csv("file.csv", **{param: arg})
+
+
+@pytest.mark.parametrize(
+    "usecols",
+    [
+        ("id", "name", "rating"),
+        ("name", "id", "rating"),
+        ("rating", "id"),
+        lambda x: x.startswith("rat"),
+        lambda x: "i" in x,
+        [0, 1, 2],
+        range(0, 3),
+        [2, 1],
+        native_pd.Series(["rating", "name"]),
+        native_pd.Series([0, 1]),
+        native_pd.Index(["rating", "id"]),
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_usecols(resources_path, usecols):
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_csv(test_files.test_file_csv_header, usecols=usecols)
+    got = pd.read_csv(test_files.test_file_csv_header, usecols=usecols)
+
+    assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+@sql_count_checker(query_count=1)
+def test_read_csv_usecols_empty(resources_path):
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_csv(test_files.test_file_csv_header, usecols=[])
+    got = pd.read_csv(test_files.test_file_csv_header, usecols=[])
+
+    assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "usecols",
+    [
+        ("rating"),
+        [1, "rating"],
+        [1, [2, 3], 4],
+        native_pd.MultiIndex.from_arrays([["rating", "id", "name"]]),
+        [12.33, np.float64(13.2333), np.double(2.5)],
+        [datetime.time(1, 2, 3)],
+        [datetime.date(2021, 1, 9), datetime.datetime(2023, 1, 1, 1, 2, 3)],
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_read_csv_usecols_invalid_types_negative(resources_path, usecols):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(
+        ValueError,
+        match="'usecols' must either be list-like of all strings, all integers or a callable.",
+    ):
+        pd.read_csv(test_files.test_file_csv, usecols=usecols)
+
+
+@pytest.mark.parametrize(
+    "usecols",
+    [["non_existent_col"], ["rating", "non_existent_col"], [-1], [0, 4]],
+)
+@sql_count_checker(query_count=8)
+def test_read_csv_usecols_nonexistent_negative(resources_path, usecols):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(
+        ValueError,
+        match="'usecols' do not match columns, columns expected but not found",
+    ):
+        pd.read_csv(test_files.test_file_csv, usecols=usecols)
+
+
+@pytest.mark.parametrize(
+    "usecols",
+    [["c1", "c2"], ["c3"], ["c3", "c2"], [0, 2], [2, 1], [1]],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_usecols_with_names(resources_path, usecols):
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header, names=["c1", "c2", "c3"], usecols=usecols
+    )
+    got = pd.read_csv(
+        test_files.test_file_csv_header, names=["c1", "c2", "c3"], usecols=usecols
+    )
+    assert_frame_equal(expected, got, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "usecols",
+    [
+        ["UPPER_CASE", '"QUOTED    " name  with whitespace '],
+        ['"QUOTED NAME with , #" special characters'],
+        [
+            '"QUOTED NAME with , #" special characters',
+            '"QUOTED    " name  with whitespace ',
+        ],
+        [0, 2],
+        [2, 1],
+        [1],
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_usecols_with_special_names(resources_path, usecols):
+    test_files = TestFiles(resources_path)
+    names = [
+        "UPPER_CASE",
+        '"QUOTED    " name  with whitespace ',
+        '"QUOTED NAME with , #" special characters',
+    ]
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header, names=names, usecols=usecols
+    )
+    got = pd.read_csv(test_files.test_file_csv_header, names=names, usecols=usecols)
+    assert_frame_equal(expected, got, check_index_type=False)
+
+
+def test_read_csv_usecols_with_names_negative(resources_path):
+    test_files = TestFiles(resources_path)
+
+    with SqlCounter(query_count=8):
+        with pytest.raises(
+            ValueError,
+            match="'usecols' do not match columns, columns expected but not found",
+        ):
+            pd.read_csv(
+                test_files.test_file_csv_header,
+                names=["c1", "c2", "c3"],
+                usecols=["id"],
+            )
+
+    with SqlCounter(query_count=8):
+        with pytest.raises(
+            ValueError,
+            match="'usecols' do not match columns, columns expected but not found",
+        ):
+            pd.read_csv(test_files.test_file_csv_header, names=["c1"], usecols=[1])
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        str,
+        {"id": str},
+        {"id": float, "rating": str},
+        {"rating": np.float64},
+        {"id": np.int64},
+        {"id": "Int64", "rating": "Float64"},
+        {"non_existent_col": int},
+        {},
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_dtype(resources_path, dtype):
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(test_files.test_file_csv_header, dtype=dtype)
+    got = pd.read_csv(test_files.test_file_csv_header, dtype=dtype)
+    assert_frame_equal(expected, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "dtype,expected_error,expected_error_msg",
+    [
+        ({"id": [str]}, TypeError, "unhashable type: 'list'"),
+        (
+            {"rating": "non_existent_type"},
+            NotImplementedError,
+            "pandas type non_existent_type is not implemented",
+        ),
+    ],
+)
+@sql_count_checker(query_count=8)
+def test_read_csv_dtype_negative(
+    resources_path, dtype, expected_error, expected_error_msg
+):
+
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(expected_error, match=expected_error_msg):
+        pd.read_csv(test_files.test_file_csv_header, dtype=dtype).to_pandas()
+
+
+@sql_count_checker(query_count=9)
+def test_read_csv_dtype_usecols(resources_path):
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header, usecols=["id", "rating"], dtype=np.float64
+    )
+    got = pd.read_csv(
+        test_files.test_file_csv_header, usecols=["id", "rating"], dtype=np.float64
+    )
+    assert_frame_equal(expected, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "index_col",
+    [
+        "id",
+        "rating",
+        ["id", "rating"],
+        ("rating", "id", "name"),
+        [],
+        1,
+        (0, 1, 2),
+        [2, 0],
+        [-1],
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_csv_index_col(resources_path, index_col):
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(test_files.test_file_csv_header, index_col=index_col)
+    got = pd.read_csv(test_files.test_file_csv_header, index_col=index_col)
+    assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+def test_read_csv_index_col_name(resources_path):
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header, names=["c1", "c2", "c3"], index_col=["c3"]
+    )
+    with SqlCounter(query_count=9):
+        got = pd.read_csv(
+            test_files.test_file_csv_header, names=["c1", "c2", "c3"], index_col=["c3"]
+        )
+        assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_csv(
+        test_files.test_file_csv_header,
+        names=["c1", "c2", "c3"],
+        index_col=["c3", "c1"],
+    )
+    with SqlCounter(query_count=9):
+        got = pd.read_csv(
+            test_files.test_file_csv_header,
+            names=["c1", "c2", "c3"],
+            index_col=["c3", "c1"],
+        )
+        assert_frame_equal(expected, got, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "index_col,expected_error_type,expected_error_msg",
+    [
+        ({"rating"}, TypeError, "list indices must be integers or slices, not set"),
+        (
+            [1, {"nested_example_non_existent"}, 2],
+            TypeError,
+            "list indices must be integers or slices, not set",
+        ),
+        (
+            [1, {"nested_example_non_existent"}, 2],
+            TypeError,
+            "list indices must be integers or slices, not set",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_read_csv_index_col_frontend_negative(
+    resources_path, index_col, expected_error_type, expected_error_msg
+):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(expected_error_type, match=expected_error_msg):
+        pd.read_csv(test_files.test_file_csv_header, index_col=index_col).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "index_col,expected_error_type,expected_error_msg",
+    [
+        (["non_existent_col", "a"], ValueError, "Index non_existent_col invalid"),
+        ([-5], IndexError, "list index is out of range"),
+        ((4), IndexError, "list index is out of range"),
+        ([0, 0], ValueError, "Duplicate columns in index_col are not allowed."),
+        ([1, "name"], ValueError, "Duplicate columns in index_col are not allowed."),
+    ],
+)
+@sql_count_checker(query_count=8)
+def test_read_csv_index_col_negative(
+    resources_path, index_col, expected_error_type, expected_error_msg
+):
+    test_files = TestFiles(resources_path)
+
+    with pytest.raises(expected_error_type, match=expected_error_msg):
+        pd.read_csv(test_files.test_file_csv_header, index_col=index_col).to_pandas()
diff --git a/tests/integ/modin/io/test_read_json.py b/tests/integ/modin/io/test_read_json.py
new file mode 100644
index 00000000000..74c869ba192
--- /dev/null
+++ b/tests/integ/modin/io/test_read_json.py
@@ -0,0 +1,373 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import copy
+import io
+import json
+import logging
+import os
+import tempfile
+from typing import Any, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+from tests.utils import Utils
+
+# Note: read_json operations have a high query count.
+# if the file is local, an additional query is needed to upload it to a stage.
+# 6 queries are needed to get the metadata required to create a Snowpark dataframe.
+# 2 queries is needed to create the Snowpandas dataframe.
+
+tmp_stage_name1 = Utils.random_stage_name()
+
+temp_dir = tempfile.TemporaryDirectory()
+TEMP_DIR_NAME = temp_dir.name
+
+TEST_JSON_FILE_1 = "test_read_json.json"
+TEST_JSON_FILE_2 = "test_read_json_2.json"
+
+
+def write_ndjson_file(
+    filepath: str, jsondata: Union[list[dict[str, Any]], dict[str, Any]]
+) -> None:
+    """
+    Writes a list of Python dictionaries to newline-delimited json (NDJSON) file.
+
+    Parameters
+    ----------
+    filepath : str
+    File to write to. If the file does not exist, it will be created.
+
+    jsondata : list of dictionaries.
+    Dictionaries to write into ndjson format.
+
+    Raises
+    ------
+    ValueError if jsondata is not a list of dictionaries or a single dictionary.
+
+    Examples
+    --------
+    List of dictionaries:
+    data = [{"a": 1, "b": 3}, {"b": 2, "c": 3}]
+    write_ndjson_file("snowpark_pandas.json", data)
+
+    snowpark_pandas.json:
+    {"a": 1, "b": 3}\n{"b": 2, "c": 3}
+
+    Single dictionary:
+    data = {"a": 1, "b": 3}
+    write_ndjson_file("snowpark_pandas2.json", data)
+
+    snowpark_pandas2.json
+    {"a": 1, "b": 3}
+    """
+    if isinstance(jsondata, dict):
+        jsondata = [jsondata]
+
+    if not isinstance(jsondata, list):
+        raise ValueError("jsondata must be a list")
+
+    with open(filepath, "w") as f:
+        for d in jsondata:
+            json.dump(d, f)
+            f.write("\n")
+
+
+def sf_read_ndjson_data(jsondata: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """
+    Returns a list of dictionaries `l` with the following properties:
+    1. len(`l`) == len(`jsondata`).
+    1. `l`[i]'s key are the UNION of all keys of each dictionary in `jsondata`.
+    2. if `jsondata`[i] is missing a key `k`, `l`[i][k] is None.
+
+    Parameters
+    ----------
+    jsondata : List[Dict[str, Any]
+    List of dictionaries.
+
+    Returns
+    -------
+    List[Dict[str, Any]
+    List of dictionaries with the properties as listed above.
+
+
+    Examples
+    --------
+    Subset of keys: fill missing keys with None.
+    data = [{"a": 1, "b": 3}, {"b": 2, "c": 3}]
+    output: [{"a": 1, "b": 3, "c": None}, {"a": None, "b": 2, "c": 3}]
+
+    No overlap in keys:
+    data = [{"a": 1, "b": 3}, {"c": 2, "d": 3}, {"e": 2, "f": 3}]
+    output: [
+        {"a": 1, "b": 3, "c": None, "d": None, "e": None},
+        {"a": None, "b": None, "c": 2, "d": 3, "e": None, "f": None},
+        {"a": None, "b": None, "c": None, "d": None, "e": 2, "f": 3}
+    ]
+    """
+
+    all_keys = set()
+    for dict_ in jsondata:
+        for key in dict_:
+            all_keys.add(key)
+
+    outer_joined_data = []
+    for dict_ in jsondata:
+        copied_dict = copy.deepcopy(dict_)
+
+        for key in all_keys:
+            if key not in copied_dict:
+                copied_dict[key] = None
+
+        outer_joined_data.append(copied_dict)
+
+    return outer_joined_data
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup(session, json_data):
+    Utils.create_stage(session, tmp_stage_name1, is_temporary=True)
+
+    test_files = [TEST_JSON_FILE_1, TEST_JSON_FILE_2]
+
+    for file_name in test_files:
+
+        with open(f"{TEMP_DIR_NAME}/{file_name}", "w") as f:
+            json.dump(json_data, f)
+
+        Utils.upload_to_stage(
+            session,
+            "@" + tmp_stage_name1,
+            f"{TEMP_DIR_NAME}/{file_name}",
+            compress=False,
+        )
+
+    yield
+    # tear down the resources after yield (pytest fixture feature)
+    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended
+    session.sql(f"DROP STAGE IF EXISTS {tmp_stage_name1}").collect()
+    temp_dir.cleanup()
+
+
+@pytest.fixture(scope="module")
+def json_data():
+    return {"A": "snowpark!", "B": 3, "C": np.float64(12.33)}
+
+
+def test_read_json_basic(json_data):
+
+    with SqlCounter(query_count=8):
+        df = pd.read_json(f"{TEMP_DIR_NAME}/{TEST_JSON_FILE_1}")
+
+    expected = native_pd.DataFrame(json_data, index=[0])
+
+    assert_frame_equal(df, expected, check_dtype=False)
+
+
+def test_read_json_single_ndjson_file():
+
+    data = [
+        {
+            "a": 1,
+            "b": "string!",
+            "c": [1, 2],
+            "d": {"key": "value", "key2": 2},
+            "e": None,
+            "f": True,
+        },
+        {
+            "a": 100,
+            "b": "string2!",
+            "c": [100, 200],
+            "d": {"key2": "value2", "key3": 3},
+            "e": None,
+            "f": False,
+        },
+    ]
+    write_ndjson_file(f"{TEMP_DIR_NAME}/test_read_json_single_ndjson_file.json", data)
+
+    with SqlCounter(query_count=8):
+        df = pd.read_json(f"{TEMP_DIR_NAME}/test_read_json_single_ndjson_file.json")
+
+    expected = native_pd.DataFrame(data, index=[0, 1])
+
+    assert_frame_equal(df, expected, check_dtype=False)
+
+
+# Tests for subset of keys, no overlapping keys, keys with some overlap.
+@pytest.mark.parametrize(
+    "ndjsondata",
+    [
+        [{"a": 100}],
+        [
+            {
+                "a_alt": 1,
+                "b_alt": "string!",
+                "c_alt": [1, 2],
+                "d_alt": {"key": "value", "key2": 2},
+                "e_alt": None,
+                "f_alt": True,
+            },
+            {
+                "a_alt": 2,
+                "b_alt": "string2!",
+                "c_alt": [100, 200],
+                "d_alt": {"key3": "value2", "key4": 200},
+                "e_alt": None,
+                "f_alt": True,
+            },
+        ],
+        [
+            {
+                "a": 101,
+                "b": "str1",
+                "new_key1": {"a_alt": 1, "b_alt": "string!", "c_alt": True},
+            }
+        ],
+    ],
+)
+def test_read_json_ndjson_different_keys(ndjsondata):
+    original_data = [
+        {
+            "a": 1,
+            "b": "string!",
+            "c": [1, 2],
+            "d": {"key": "value", "key2": 2},
+            "e": None,
+            "f": True,
+        },
+    ]
+
+    data = original_data + ndjsondata
+
+    write_ndjson_file(
+        f"{TEMP_DIR_NAME}/test_read_json_single_ndjson_different_keys.json", data
+    )
+
+    with SqlCounter(query_count=8):
+        snow_df = pd.read_json(
+            f"{TEMP_DIR_NAME}/test_read_json_single_ndjson_different_keys.json"
+        )
+
+    expected_data = sf_read_ndjson_data(data)
+    expected = native_pd.DataFrame(
+        expected_data, index=[i for i in range(len(expected_data))]
+    )
+
+    # We set the order of the expected DataFrame using the order of the Snowpark pandas
+    # dataframe since the order is undeterministic.
+    expected_df = expected.reindex(snow_df.columns, axis=1)
+
+    assert_frame_equal(snow_df, expected_df, check_dtype=False)
+
+
+def test_read_json_staged_file(json_data):
+    with SqlCounter(query_count=7):
+        snow_df = pd.read_json(f"@{tmp_stage_name1}/{TEST_JSON_FILE_1}")
+
+    expected = native_pd.DataFrame(json_data, index=[0])
+
+    assert_frame_equal(snow_df, expected, check_dtype=False)
+
+
+def test_read_json_staged_folder():
+
+    with SqlCounter(query_count=7):
+        snow_df = pd.read_json(f"@{tmp_stage_name1}")
+
+    expected = native_pd.DataFrame(
+        {
+            "A": ["snowpark!", "snowpark!"],
+            "B": [3, 3],
+            "C": [np.float64(12.33), np.float64(12.33)],
+        },
+        index=[0, 1],
+    )
+    assert_frame_equal(snow_df, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=5)
+@pytest.mark.xfail(reason="SNOW-1336174: Remove xfail by handling empty JSON files")
+def test_read_json_empty_file():
+
+    open("test_read_json_empty_file.json", "w")
+
+    snow_df = pd.read_json("test_read_json_empty_file.json")
+
+    os.remove("test_read_json_empty_file.json")
+
+    assert len(snow_df) == 0
+
+
+@sql_count_checker(query_count=3)
+def test_read_json_malformed_file_negative():
+
+    with open("test_read_json_malformed_file.json", "w") as f:
+        f.write("{a: 3, key_no_value}")
+
+    with pytest.raises(AssertionError):
+        pd.read_json("test_read_json_malformed_file.json")
+
+    os.remove("test_read_json_malformed_file.json")
+
+
+@pytest.mark.parametrize(
+    "parameter, argument",
+    [
+        ("orient", "records"),
+        ("typ", "frame"),
+        ("dtype", True),
+        ("convert_axes", True),
+        ("convert_dates", True),
+        ("keep_default_dates", True),
+        ("precise_float", True),
+        ("date_unit", "s"),
+        ("encoding_errors", "ignore"),
+        ("lines", True),
+        ("chunksize", 100),
+        ("nrows", 10),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_read_json_unimplemented_parameter_negative(parameter, argument):
+    with pytest.raises(NotImplementedError, match=f"{parameter} is not implemented."):
+        pd.read_json("file.json", **{parameter: argument})
+
+
+@pytest.mark.parametrize(
+    "parameter, argument", [("storage_options", "random_option"), ("engine", "ujson")]
+)
+@sql_count_checker(query_count=9)
+def test_read_json_warning(caplog, parameter, argument, json_data):
+    warning_msg = f"The argument `{parameter}` of `pd.read_json` has been ignored by Snowpark pandas API"
+
+    caplog.clear()
+
+    with caplog.at_level(logging.WARNING):
+        snow_df = pd.read_json(
+            f"{TEMP_DIR_NAME}/{TEST_JSON_FILE_1}", **{parameter: argument}
+        )
+
+    assert warning_msg in caplog.text
+
+    expected = native_pd.DataFrame(json_data, index=[0])
+    assert_frame_equal(snow_df, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=0)
+def test_read_json_filepath_negative(json_data):
+    buffer = io.StringIO()
+    json.dump(json_data, buffer)
+
+    with pytest.raises(
+        NotImplementedError,
+        match="'path' must be a path to a file or folder stored locally or on a Snowflake stage.",
+    ):
+        pd.read_json(buffer)
diff --git a/tests/integ/modin/io/test_read_parquet.py b/tests/integ/modin/io/test_read_parquet.py
new file mode 100644
index 00000000000..2de81aefefd
--- /dev/null
+++ b/tests/integ/modin/io/test_read_parquet.py
@@ -0,0 +1,195 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+import io
+import logging
+import os
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+from tests.utils import TestFiles, Utils
+
+tmp_stage_name1 = Utils.random_stage_name()
+test_file_parquet = "test_file_with_special_characters.parquet"
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup(session, resources_path):
+    test_files = TestFiles(resources_path)
+    Utils.create_stage(session, tmp_stage_name1, is_temporary=True)
+    Utils.upload_to_stage(
+        session,
+        "@" + tmp_stage_name1,
+        test_files.test_file_with_special_characters_parquet,
+        compress=False,
+    )
+
+    yield
+    # tear down the resources after yield (pytest fixture feature)
+    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended
+    session.sql(f"DROP STAGE IF EXISTS {tmp_stage_name1}").collect()
+
+
+@sql_count_checker(query_count=9)
+def test_read_parquet_all_dtypes(resources_path):
+    test_files = TestFiles(resources_path)
+
+    native_df = native_pd.read_parquet(test_files.test_file_all_data_types_parquet)
+    snow_df = pd.read_parquet(test_files.test_file_all_data_types_parquet)
+
+    # native pandas fails to recognize that column 'N' is of float type.
+    native_df[["N"]] = native_df[["N"]].astype("float64")
+    assert_frame_equal(snow_df, native_df, check_index_type=False, check_dtype=False)
+
+
+@sql_count_checker(query_count=8)
+def test_read_parquet_stage_file(resources_path):
+    got = pd.read_parquet(f"@{tmp_stage_name1}/{test_file_parquet}")
+    test_files = TestFiles(resources_path)
+    expected = native_pd.read_parquet(
+        test_files.test_file_with_special_characters_parquet
+    )
+
+    assert_frame_equal(got, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=9)
+def test_read_parquet_special_chars_in_column_names(resources_path):
+    test_files = TestFiles(resources_path)
+    native_df = native_pd.read_parquet(
+        test_files.test_file_with_special_characters_parquet
+    )
+    snow_df = pd.read_parquet(test_files.test_file_with_special_characters_parquet)
+
+    assert_frame_equal(native_df, snow_df, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        [
+            "Ema!l",
+            "Address",
+            "Av@t@r",
+            "Avg. $ession Length",
+            "T!me on App",
+            "T!me on Website",
+            "Length of Membership",
+            "Ye@rly Amount $pent",
+        ],
+        ["Ema!l"],
+        ["Avg. $ession Length", "T!me on App"],
+        ["T!me on Website", "Av@t@r", "Ye@rly Amount $pent"],
+        [],
+    ],
+)
+@sql_count_checker(query_count=8)
+def test_read_parquet_columns(resources_path, columns):
+
+    got = pd.read_parquet(f"@{tmp_stage_name1}/{test_file_parquet}", columns=columns)
+
+    test_files = TestFiles(resources_path)
+
+    expected = native_pd.read_parquet(
+        test_files.test_file_with_special_characters_parquet, columns=columns
+    )
+
+    assert_frame_equal(got, expected, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [
+        "Ema!l",
+        ("Ema!l", "Av@t@r"),
+        0,
+        np.float64(123),
+        [1, "Length of Membership"],
+        [1, [2, 3], 4],
+        [12.33, np.float64(13.2333), np.double(2.5)],
+        [datetime.time(1, 2, 3)],
+        [datetime.date(2021, 1, 9), datetime.datetime(2023, 1, 1, 1, 2, 3)],
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_read_parquet_columns_invalid_types_negative(resources_path, columns):
+
+    with pytest.raises(
+        ValueError, match="'columns' must either be list of all strings."
+    ):
+        pd.read_parquet(f"@{tmp_stage_name1}/{test_file_parquet}", columns=columns)
+
+
+@pytest.mark.parametrize(
+    "columns",
+    [["Ema!l", "non_existent_col"], ["non_existent_col_in_list"]],
+)
+@sql_count_checker(query_count=7)
+def test_read_parquet_columns_non_existent_column_negative(resources_path, columns):
+
+    with pytest.raises(
+        ValueError,
+        match="'usecols' do not match columns, columns expected but not found",
+    ):
+        pd.read_parquet(f"@{tmp_stage_name1}/{test_file_parquet}", columns=columns)
+
+
+@sql_count_checker(query_count=0)
+def test_read_parquet_unimplemented_parameter_negative():
+    with pytest.raises(
+        NotImplementedError, match="use_nullable_dtypes is not implemented."
+    ):
+        pd.read_parquet("file.parquet", use_nullable_dtypes=True)
+
+
+@pytest.mark.parametrize(
+    "parameter, argument",
+    [
+        ("engine", "pyarrow"),
+        ("storage_options", "random_option"),
+        ("filesystem", "test invalid link"),
+        ("filters", [("foo", "==", "1")]),
+    ],
+)
+@sql_count_checker(query_count=9)
+def test_read_parquet_warning(caplog, parameter, argument):
+    temp_file_name = "test_read_parquet_warning.parquet"
+
+    df = native_pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
+
+    df.to_parquet(temp_file_name)
+
+    warning_msg = f"The argument `{parameter}` of `pd.read_parquet` has been ignored by Snowpark pandas API"
+
+    caplog.clear()
+
+    with caplog.at_level(logging.WARNING):
+        snow_df = pd.read_parquet(temp_file_name, **{parameter: argument})
+
+    assert warning_msg in caplog.text
+
+    assert_frame_equal(df, snow_df, check_dtype=False)
+
+    os.remove(temp_file_name)
+
+
+@sql_count_checker(query_count=0)
+def test_read_parquet_filepath_negative():
+    df = native_pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
+
+    bytes_data = df.to_parquet()
+    buffer = io.BytesIO(bytes_data)
+
+    with pytest.raises(
+        NotImplementedError,
+        match="'path' must be a path to a file or folder stored locally or on a Snowflake stage.",
+    ):
+        pd.read_parquet(buffer)
diff --git a/tests/integ/modin/io/test_read_snowflake.py b/tests/integ/modin/io/test_read_snowflake.py
new file mode 100644
index 00000000000..1ffc7d4a318
--- /dev/null
+++ b/tests/integ/modin/io/test_read_snowflake.py
@@ -0,0 +1,492 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.analyzer.analyzer_utils import quote_name
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    READ_ONLY_TABLE_SUFFIX,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+)
+from snowflake.snowpark.modin.plugin.utils.exceptions import (
+    SnowparkPandasErrorCode,
+    SnowparkPandasException,
+)
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    BASIC_TYPE_DATA1,
+    BASIC_TYPE_DATA2,
+    SEMI_STRUCTURED_TYPE_DATA,
+    VALID_SNOWFLAKE_COLUMN_NAMES,
+    assert_frame_equal,
+    assert_series_equal,
+    create_table_with_type,
+)
+from tests.utils import Utils
+
+
+def call_read_snowflake(table_name: str, as_query: bool, **kwargs) -> pd.DataFrame:
+    """
+    Helper method to call `read_snowflake`, either with the table name directly, or with `SELECT * FROM {table_name}`.
+
+    Args:
+        table_name: The name of the table to call.
+        as_query: Whether to call `read_snowflake` with a query or the table name.
+        kwargs: Keyword arguments to pass to `read_snowflake`.
+
+    Returns:
+        The resulting Snowpark pandas DataFrame.
+    """
+    if as_query:
+        return pd.read_snowflake(f"SELECT * FROM {table_name}", **kwargs)
+    return pd.read_snowflake(table_name, **kwargs)
+
+
+@sql_count_checker(query_count=5)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_basic(session, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    fully_qualified_name = [
+        session.get_current_database(),
+        session.get_current_schema(),
+        table_name,
+    ]
+    session.create_dataframe([BASIC_TYPE_DATA1, BASIC_TYPE_DATA2]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    if as_query:
+        names_list = [table_name, ".".join(fully_qualified_name)]
+    else:
+        names_list = [table_name, fully_qualified_name]
+    # create snowpark pandas dataframe
+    for name in names_list:
+        df = call_read_snowflake(name, as_query)
+
+        # test if the snapshot is created
+        # the table name should match the following reg expression
+        # "^SNOWPARK_TEMP_TABLE_[0-9A-Z]+$")
+        sql = df._query_compiler._modin_frame.ordered_dataframe.queries["queries"][-1]
+        temp_table_pattern = ".*SNOWPARK_TEMP_TABLE_[0-9A-Z]+.*$"
+        assert re.match(temp_table_pattern, sql) is not None
+        assert READ_ONLY_TABLE_SUFFIX in sql
+
+        # check the row position snowflake quoted identifier is set
+        assert (
+            df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+            is not None
+        )
+
+        pdf = df.to_pandas()
+        assert pdf.values[0].tolist() == BASIC_TYPE_DATA1
+        assert pdf.values[1].tolist() == BASIC_TYPE_DATA2
+
+
+@sql_count_checker(query_count=3)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_semi_structured_types(session, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe([SEMI_STRUCTURED_TYPE_DATA]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+
+    # create snowpark pandas dataframe
+    df = call_read_snowflake(table_name, as_query)
+
+    pdf = df.to_pandas()
+    for res, expected_res in zip(pdf.values[0].tolist(), SEMI_STRUCTURED_TYPE_DATA):
+        assert res == expected_res
+
+
+@sql_count_checker(query_count=3)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_none_nan(session, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe([None, float("nan")]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+
+    # create snowpark pandas dataframe
+    df = call_read_snowflake(table_name, as_query)
+
+    pdf = df.to_pandas()
+    assert np.isnan(pdf.values[0][0])
+    assert np.isnan(pdf.values[1][0])
+
+
+@pytest.mark.parametrize("col_name", VALID_SNOWFLAKE_COLUMN_NAMES)
+@sql_count_checker(query_count=3)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_column_names(session, col_name, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, f"{col_name} int", is_temporary=True)
+
+    # create snowpark pandas dataframe
+    df = call_read_snowflake(table_name, as_query)
+
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert pdf.columns[0] == extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(col_name)
+    )
+
+
+@pytest.mark.parametrize(
+    "col_name1, col_name2",
+    list(zip(VALID_SNOWFLAKE_COLUMN_NAMES, VALID_SNOWFLAKE_COLUMN_NAMES[::-1])),
+)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+@sql_count_checker(query_count=3)
+def test_read_snowflake_index_col(session, col_name1, col_name2, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, f"{col_name1} int, {col_name2} text", is_temporary=True
+    )
+
+    col_label1 = extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(col_name1)
+    )
+    col_label2 = extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(col_name2)
+    )
+
+    # create snowpark pandas dataframe
+    df = call_read_snowflake(table_name, as_query, index_col=col_label1)
+
+    pdf = df.to_pandas()
+    assert pdf.index.name == col_label1
+    assert len(pdf.columns) == 1
+    assert pdf.columns[0] == col_label2
+
+
+@sql_count_checker(query_count=4)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_index_col_multiindex(session, as_query):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session,
+        table_name,
+        "col1 text, col2 text, col3 text, col4 text",
+        is_temporary=True,
+    )
+    session.sql(f"insert into {table_name} values ('A', 'B', 'C', 'D')").collect()
+
+    # create snowpark pandas dataframe
+    df = call_read_snowflake(table_name, as_query, index_col=["COL1", "COL2", "COL3"])
+
+    assert_index_equal(
+        df.index,
+        pd.MultiIndex.from_tuples([("A", "B", "C")], names=["COL1", "COL2", "COL3"]),
+    )
+
+
+@pytest.mark.parametrize(
+    # non_existing_index_col latter doesn't exist in the table because we require
+    # it equals to extract_pandas_label_from_snowflake_quoted_identifier(quote_name(col_name))
+    # in read_snowflake
+    "col_name, non_existing_index_col",
+    (
+        ("col", "test"),
+        ("col", "col"),
+        ("COL", "col"),
+        ('"col"', "COL"),
+        ('"COL"', "col"),
+    ),
+)
+@pytest.mark.parametrize("index_col_or_columns", [True, False])
+@sql_count_checker(query_count=2)
+def test_read_snowflake_non_existing(
+    session, col_name, non_existing_index_col, index_col_or_columns
+):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, f"{col_name} int", is_temporary=True)
+    with pytest.raises(
+        KeyError,
+        match="is not in existing snowflake columns",
+    ):
+        if index_col_or_columns:
+            pd.read_snowflake(table_name, index_col=non_existing_index_col)
+        else:
+            pd.read_snowflake(table_name, columns=[non_existing_index_col])
+
+
+@pytest.mark.parametrize("col_name", VALID_SNOWFLAKE_COLUMN_NAMES)
+@sql_count_checker(query_count=3)
+def test_read_snowflake_columns(session, col_name):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, f"{col_name} int, s text", is_temporary=True
+    )
+
+    col_label = extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(col_name)
+    )
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(table_name, columns=[col_label])
+
+    pdf = df.to_pandas()
+    assert pdf.columns[0] == col_label
+
+
+@sql_count_checker(query_count=3)
+def test_read_snowflake_both_index_col_columns(session):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session,
+        table_name,
+        "X int, Y int, Z int",
+        is_temporary=True,
+    )
+
+    df = pd.read_snowflake(table_name, index_col="X", columns=["Y"])
+    pdf = df.to_pandas()
+    assert pdf.index.name == "X"
+    assert pdf.columns[0] == "Y"
+
+
+@sql_count_checker(query_count=8)
+def test_read_snowflake_duplicate_columns(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, '"X" int, Y int', is_temporary=True)
+
+    df = pd.read_snowflake(table_name, columns=["X", "X"])
+    assert df.columns.tolist() == ["X", "X"]
+
+    df = pd.read_snowflake(table_name, index_col=["X", "X"])
+    assert df.index.names == ["X", "X"]
+
+    df = pd.read_snowflake(table_name, index_col="X", columns=["X", "Y"])
+    assert df.index.names == ["X"]
+    assert df.columns.tolist() == ["X", "Y"]
+
+    df = pd.read_snowflake(table_name, index_col=["X", "Y"], columns=["X", "Y"])
+    assert df.index.names == ["X", "Y"]
+    assert df.columns.tolist() == ["X", "Y"]
+
+
+@sql_count_checker(query_count=0)
+def test_read_snowflake_table_not_exist_negative(session) -> None:
+    table_name = "non_exist_table_error"
+
+    with pytest.raises(SnowparkPandasException) as ex:
+        pd.read_snowflake(table_name)
+
+    assert ex.value.error_code == SnowparkPandasErrorCode.GENERAL_SQL_EXCEPTION.value
+
+
+@sql_count_checker(query_count=1)
+def test_read_snowflake_column_not_list_raises(session) -> None:
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    col_name = "TEST"
+    Utils.create_table(
+        session, table_name, f"{col_name} int, s text", is_temporary=True
+    )
+
+    with pytest.raises(ValueError, match="columns must be provided as list"):
+        pd.read_snowflake(table_name, columns=col_name)
+
+
+@pytest.mark.parametrize(
+    "table_type",
+    [
+        "",
+        "temporary",
+        "transient",
+        "view",
+        "SECURE VIEW",
+        "TEMP VIEW",
+    ],
+)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_with_views(
+    session, test_table_name, table_type, caplog, as_query
+) -> None:
+    # create a temporary test table
+    expected_query_count = 6
+    original_table_type = "temporary"
+    if table_type in ["", "temporary", "transient"]:
+        original_table_type = table_type
+        expected_query_count = 3
+    elif table_type == "MATERIALIZED VIEW":
+        original_table_type = ""
+    with SqlCounter(query_count=expected_query_count):
+        create_table_with_type(
+            session, test_table_name, "col1 int, s text", table_type=original_table_type
+        )
+        session.sql(f"insert into {test_table_name} values (1, 'ok')").collect()
+        # create a view out of the temporary test table
+        table_name = test_table_name
+        view_name = None
+        try:
+            if table_type in ["view", "SECURE VIEW", "TEMP VIEW"]:
+                view_name = Utils.random_name_for_temp_object(TempObjectType.VIEW)
+                session.sql(
+                    f"create or replace {table_type} {view_name} (col1, s) as select * from {test_table_name}"
+                ).collect()
+                table_name = view_name
+            caplog.clear()
+            with caplog.at_level(logging.DEBUG):
+                df = call_read_snowflake(table_name, as_query)
+            assert df.columns.tolist() == ["COL1", "S"]
+            if table_type in ["view", "SECURE VIEW", "TEMP VIEW"]:
+                # verify temporary table is materialized for view, secure view and temp view
+                assert "Materialize temporary table" in caplog.text
+            else:
+                # verify no temporary table is materialized for regular table
+                assert not ("Materialize temporary table" in caplog.text)
+        finally:
+            if view_name:
+                Utils.drop_view(session, view_name)
+
+
+def test_read_snowflake_row_access_policy_table(
+    session,
+    test_table_name,
+) -> None:
+    Utils.create_table(session, test_table_name, "col1 int, s text", is_temporary=True)
+    session.sql(f"insert into {test_table_name} values (1, 'ok')").collect()
+    # create row access policy that there is no access to the row
+    session.sql(
+        "create or replace row access policy no_access_policy as (c1 int) returns boolean -> False"
+    ).collect()
+    # add the row access policy to the table on column col1
+    session.sql(
+        f"alter table {test_table_name} add row access policy no_access_policy on (col1)"
+    ).collect()
+
+    with SqlCounter(query_count=3):
+        df = pd.read_snowflake(test_table_name)
+
+        assert df.columns.tolist() == ["COL1", "S"]
+        assert len(df) == 0
+
+    with SqlCounter(query_count=3):
+        df = pd.read_snowflake(f"SELECT * FROM {test_table_name}")
+
+        assert df.columns.tolist() == ["COL1", "S"]
+        assert len(df) == 0
+
+
+@pytest.mark.parametrize(
+    "input_data, snowflake_type_string, logical_dtype, actual_dtype",
+    [
+        ([1.1, 2.2, 3.3], "decimal(3, 1)", np.dtype("float64"), np.dtype("float64")),
+        ([1.1, 2.2, 3.3], "decimal(38, 1)", np.dtype("float64"), np.dtype("float64")),
+        ([1.0, 2.0, 3.0], "decimal(3, 0)", np.dtype("int64"), np.dtype("int8")),
+        ([1.0, 2.0, 3.0], "decimal(38, 0)", np.dtype("int64"), np.dtype("int8")),
+        (
+            [1 << 20, 2 << 20, 3 << 20],
+            "decimal(15, 0)",
+            np.dtype("int64"),
+            np.dtype("int32"),
+        ),
+        # The example below is questionable. Arguably to_pandas should be returning objects with decimals.
+        (
+            [1 << 65, 2 << 65, 3 << 65],
+            "decimal(38, 0)",
+            np.dtype("int64"),
+            np.dtype("float64"),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+@sql_count_checker(query_count=5)
+def test_decimal(
+    input_data,
+    snowflake_type_string,
+    logical_dtype,
+    actual_dtype,
+    session,
+    test_table_name,
+    as_query,
+) -> None:
+    colname = "D"
+    values_string = ",".join(f"({i})" for i in input_data)
+    Utils.create_table(
+        session, test_table_name, f"d {snowflake_type_string}", is_temporary=True
+    )
+    session.sql(f"insert into {test_table_name} values {values_string}").collect()
+    # create row access policy that there is no access to the row
+    df = call_read_snowflake(test_table_name, as_query)
+
+    assert_series_equal(df.dtypes, native_pd.Series([logical_dtype], index=[colname]))
+    pandas_df = df.to_pandas()
+    assert_series_equal(
+        pandas_df.dtypes, native_pd.Series([actual_dtype], index=[colname])
+    )
+    assert_frame_equal(df, pandas_df, check_dtype=False)
+
+
+@sql_count_checker(query_count=9)
+@pytest.mark.parametrize(
+    "as_query", [True, False], ids=["read_with_select_*", "read_with_table_name"]
+)
+def test_read_snowflake_with_table_in_different_db(session, caplog, as_query) -> None:
+    db_name = f"testdb_snowpandas_{Utils.random_alphanumeric_str(4)}"
+    schema_name = f"testschema_snowpandas_{Utils.random_alphanumeric_str(4)}"
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    origin_db = session.get_current_database()
+    origin_schema = session.get_current_schema()
+    try:
+        # create a different database and schema for testing compare with the
+        # current database and schema used in current session.
+        session.sql(f"create database {db_name}").collect()
+        session.sql(f"create schema {db_name}.{schema_name}").collect()
+
+        # create table
+        Utils.create_table(
+            session,
+            f"{db_name}.{schema_name}.{table_name}",
+            '"X" int, Y int',
+            is_temporary=True,
+        )
+
+        caplog.clear()
+        with caplog.at_level(logging.DEBUG):
+            df = call_read_snowflake(table_name, as_query)
+        # verify no temporary table is materialized for regular table
+        assert not ("Materialize temporary table" in caplog.text)
+        assert df.columns.tolist() == ["X", "Y"]
+    finally:
+        # drop the created temp object
+        Utils.drop_table(session, f"{db_name}.{schema_name}.{table_name}")
+        Utils.drop_schema(session, f"{db_name}.{schema_name}")
+        Utils.drop_database(session, db_name)
+        # recover the origin db and schema
+        session.use_database(origin_db)
+        session.use_schema(origin_schema)
diff --git a/tests/integ/modin/io/test_read_snowflake_query_call.py b/tests/integ/modin/io/test_read_snowflake_query_call.py
new file mode 100644
index 00000000000..b9ee427397d
--- /dev/null
+++ b/tests/integ/modin/io/test_read_snowflake_query_call.py
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import TempObjectType
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+from tests.utils import Utils
+
+
+@sql_count_checker(query_count=7, sproc_count=1)
+def test_read_snowflake_call_sproc(session):
+    session.sql(
+        """
+        CREATE OR REPLACE PROCEDURE filter_by_role(tableName VARCHAR, role VARCHAR)
+        RETURNS TABLE(id NUMBER, name VARCHAR, role VARCHAR)
+        LANGUAGE PYTHON
+        RUNTIME_VERSION = '3.8'
+        PACKAGES = ('snowflake-snowpark-python')
+        HANDLER = 'filter_by_role'
+        AS $$from snowflake.snowpark.functions import col
+def filter_by_role(session, table_name, role):
+    df = session.table(table_name)
+    return df.filter(col('role') == role)
+                $$"""
+    ).collect()
+    try:
+        table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+        session.sql(
+            f"""CREATE OR REPLACE TEMPORARY TABLE {table_name}(id NUMBER, name VARCHAR, role VARCHAR) AS SELECT * FROM VALUES(1, 'Alice', 'op'), (2, 'Bob', 'dev')"""
+        ).collect()
+        df = pd.read_snowflake(f"CALL filter_by_role('{table_name}', 'op')")
+        native_df = native_pd.DataFrame(
+            [[1, "Alice", "op"]], columns=["ID", "NAME", "ROLE"]
+        )
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(df, native_df)
+    finally:
+        session.sql("DROP PROCEDURE filter_by_role(VARCHAR, VARCHAR)").collect()
+
+
+@sql_count_checker(query_count=5, sproc_count=2)
+def test_read_snowflake_call_system_function(session):
+    df = pd.read_snowflake("CALL SYSTEM$TYPEOF(TRUE)")
+    native_df = native_pd.DataFrame(session.sql("CALL SYSTEM$TYPEOF(TRUE)").collect())
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(df, native_df)
diff --git a/tests/integ/modin/io/test_read_snowflake_query_cte.py b/tests/integ/modin/io/test_read_snowflake_query_cte.py
new file mode 100644
index 00000000000..3abe36f89e3
--- /dev/null
+++ b/tests/integ/modin/io/test_read_snowflake_query_cte.py
@@ -0,0 +1,195 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import TempObjectType
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+from tests.utils import Utils
+
+
+@sql_count_checker(query_count=5, union_count=4)
+def test_read_snowflake_query_basic_cte(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame([[1, 2], [3, 7], [6, 7]], columns=["A", "B"])
+    ).write.save_as_table(table_name, table_type="temp")
+    SQL_QUERY = f"""WITH CTE1 AS (SELECT SQUARE(A) AS A2, SQUARE(B) AS B2 FROM {table_name} WHERE A % 2 = 1),
+    CTE2 AS (SELECT SQUARE(A2) as A2, SQUARE(B2) AS B4 FROM CTE1 WHERE B2 % 2 = 0) (SELECT * FROM {table_name})
+    UNION ALL (SELECT * FROM CTE1) UNION ALL (SELECT * FROM CTE2)"""
+    df = pd.read_snowflake(SQL_QUERY)
+    pdf = native_pd.DataFrame(
+        [[1, 2], [3, 7], [6, 7], [1, 4], [9, 49], [1, 16]], columns=["A", "B"]
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(df, pdf)
+
+
+@sql_count_checker(query_count=4, union_count=2)
+def test_read_snowflake_query_recursive_cte():
+    SQL_QUERY = """WITH RECURSIVE current_f (current_val, previous_val) AS
+                    (
+                    SELECT 0, 1
+                    UNION ALL
+                    SELECT current_val + previous_val, current_val FROM current_f
+                    WHERE current_val + previous_val < 100
+                    )
+                SELECT current_val FROM current_f ORDER BY current_val"""
+    df = pd.read_snowflake(SQL_QUERY)
+    native_df = (
+        native_pd.DataFrame(
+            [[0], [1], [1], [2], [3], [5], [8], [13], [21], [34], [55], [89]],
+            columns=["CURRENT_VAL"],
+        )
+        .sort_values("CURRENT_VAL")
+        .reset_index(drop=True)
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        df.sort_values("CURRENT_VAL").reset_index(drop=True), native_df
+    )
+
+
+@sql_count_checker(query_count=5, join_count=2, union_count=2)
+def test_read_snowflake_query_complex_recursive_cte(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.sql(
+        f"""
+                -- The components of a car.
+                CREATE OR REPLACE TEMPORARY TABLE {table_name} (
+                    description VARCHAR,
+                    component_ID INTEGER,
+                    quantity INTEGER,
+                    parent_component_ID INTEGER
+                    )
+                """
+    ).collect()
+    session.sql(
+        f"""
+                INSERT INTO {table_name} (description, quantity, component_ID, parent_component_ID) VALUES
+                    ('car', 1, 1, 0),
+                    ('wheel', 4, 11, 1),
+                        ('tire', 1, 111, 11),
+                        ('#112 bolt', 5, 112, 11),
+                        ('brake', 1, 113, 11),
+                            ('brake pad', 1, 1131, 113),
+                    ('engine', 1, 12, 1),
+                        ('piston', 4, 121, 12),
+                        ('cylinder block', 1, 122, 12),
+                        ('#112 bolt', 16, 112, 12)   -- Can use same type of bolt in multiple places
+                """
+    )
+    SQL_QUERY = f"""
+                    WITH RECURSIVE current_layer (indent, layer_ID, parent_component_ID, component_id, description, sort_key) AS (
+                    SELECT
+                        '...',
+                        1,
+                        parent_component_ID,
+                        component_id,
+                        description,
+                        '0001'
+                        FROM {table_name} WHERE component_id = 1
+                    UNION ALL
+                    SELECT indent || '...',
+                        layer_ID + 1,
+                        {table_name}.parent_component_ID,
+                        {table_name}.component_id,
+                        {table_name}.description,
+                        sort_key || SUBSTRING('000' || {table_name}.component_ID, -4)
+                        FROM current_layer JOIN {table_name}
+                        ON ({table_name}.parent_component_id = current_layer.component_id)
+                    )
+                    SELECT
+                    -- The indentation gives us a sort of "side-ways tree" view, with
+                    -- sub-{table_name} indented under their respective {table_name}.
+                    indent || description AS description,
+                    component_id,
+                    parent_component_ID,
+                    sort_key
+                    -- The layer_ID and sort_key are useful for debugging, but not
+                    -- needed in the report.
+                    --  , layer_ID, sort_key
+                    FROM current_layer
+                    ORDER BY sort_key
+                """
+    cur = session.connection.cursor()
+    cur.execute(SQL_QUERY)
+    native_df = cur.fetch_pandas_all().sort_values("SORT_KEY").reset_index(drop=True)
+    snow_df = (
+        pd.read_snowflake(SQL_QUERY).sort_values("SORT_KEY").reset_index(drop=True)
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
+
+
+@sql_count_checker(query_count=6, sproc_count=2)
+def test_read_snowflake_query_cte_with_cross_language_sproc(session):
+    # create table name
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    SPROC_CREATION = """
+                WITH filter_by_role AS PROCEDURE (table_name VARCHAR, role VARCHAR)
+                RETURNS TABLE("id" NUMBER, "name" VARCHAR, "role" VARCHAR)
+                LANGUAGE SCALA
+                RUNTIME_VERSION = '2.12'
+                PACKAGES = ('com.snowflake:snowpark:latest')
+                HANDLER = 'Filter.filterByRole'
+                AS
+                $$
+                import com.snowflake.snowpark.functions._
+                import com.snowflake.snowpark._
+
+                object Filter {
+                    def filterByRole(session: Session, tableName: String, role: String): DataFrame = {
+                        val table = session.table(tableName)
+                        val filteredRows = table.filter(col("\\"role\\"") === role)
+                        return filteredRows
+                    }
+                }
+                $$
+                """
+    SQL_QUERY = f"{SPROC_CREATION} CALL filter_by_role('{table_name}', 'op')"
+    native_df = native_pd.DataFrame(
+        [[1, "Alice", "op"], [2, "Bob", "dev"], [3, "Cindy", "dev"]],
+        columns=["id", "name", "role"],
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    native_df = native_df.iloc[0:1]
+    snow_df = pd.read_snowflake(SQL_QUERY)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
+
+
+@sql_count_checker(query_count=6, sproc_count=2)
+def test_read_snowflake_query_cte_with_python_anonymous_sproc(session):
+    # create table name
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    SPROC_CREATION = """
+                WITH filterByRole AS PROCEDURE (tableName VARCHAR, role VARCHAR)
+                RETURNS TABLE("id" NUMBER, "name" VARCHAR, "role" VARCHAR)
+                LANGUAGE PYTHON
+                RUNTIME_VERSION = '3.8'
+                PACKAGES = ('snowflake-snowpark-python')
+                HANDLER = 'filter_by_role'
+                AS $$from snowflake.snowpark.functions import col
+
+def filter_by_role(session, table_name, role):
+    df = session.table(table_name)
+    return df.filter(col('"role"') == role)
+                $$
+                """
+    SQL_QUERY = f"{SPROC_CREATION} CALL filterByRole('{table_name}', 'op')"
+    native_df = native_pd.DataFrame(
+        [[1, "Alice", "op"], [2, "Bob", "dev"], [3, "Cindy", "dev"]],
+        columns=["id", "name", "role"],
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    native_df = native_df.iloc[0:1]
+    snow_df = pd.read_snowflake(SQL_QUERY)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
diff --git a/tests/integ/modin/io/test_read_snowflake_query_order_by.py b/tests/integ/modin/io/test_read_snowflake_query_order_by.py
new file mode 100644
index 00000000000..a75e00b7a22
--- /dev/null
+++ b/tests/integ/modin/io/test_read_snowflake_query_order_by.py
@@ -0,0 +1,246 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.modin.plugin.utils.warning_message import (
+    ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING,
+    WarningMessage,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+from tests.utils import Utils
+
+
+@sql_count_checker(query_count=5)
+def test_select_star_with_order_by(session, caplog):
+    # This test ensures that the presence of an ORDER BY causes us not to take the fastpath
+    # of select * from table, where we just do `pd.read_snowflake("table")` instead.
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    # Want random permutation, but need to make sure that there are no duplicates in the sorting column
+    # as otherwise ties may be broken differently between us and vanilla pandas.
+    native_df = native_pd.DataFrame(
+        np.random.choice(10_000, size=(1_000, 10), replace=False),
+        columns=[f"col{i}" for i in range(10)],
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    caplog.clear()
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        snow_df = pd.read_snowflake(f'SELECT * FROM {table_name} ORDER BY "col8"')
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+    native_df = native_df.reset_index(drop=True)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_df.sort_values("col0").reset_index(drop=True),
+        native_df.sort_values("col0").reset_index(drop=True),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_no_order_by_but_column_name_shadows(session, caplog):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "ORDER BY"]
+        )
+    ).write.save_as_table(table_name, table_type="temp")
+    caplog.clear()
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        df = pd.read_snowflake(f"SELECT * FROM {table_name}")
+    # verify no temporary table is materialized for regular table
+    assert "Materialize temporary table" not in caplog.text
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING not in caplog.text
+    assert df.columns.tolist() == ["A", "B", "ORDER BY"]
+
+
+@pytest.mark.parametrize("order_by_col", [1, '"ORDER BY"', '"ORDER BY 1"', "A"])
+@sql_count_checker(query_count=5)
+def test_order_by_and_column_name_shadows(session, caplog, order_by_col):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    # Want random permutation, but need to make sure that there are no duplicates in the sorting column
+    # as otherwise ties may be broken differently between us and vanilla pandas.
+    native_df = native_pd.DataFrame(
+        np.random.choice(3_000, size=(1_000, 3), replace=False),
+        columns=["ORDER BY", "A", "ORDER BY 1"],
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    caplog.clear()
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        snow_df = pd.read_snowflake(
+            f'SELECT "ORDER BY", A, "ORDER BY 1" FROM {table_name} ORDER BY {order_by_col}'
+        )
+    # verify warning issued since we are sorting.
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_df.sort_values("A").reset_index(drop=True),
+        native_df.sort_values("A").reset_index(drop=True),
+    )
+
+
+@sql_count_checker(query_count=5)
+def test_inner_order_by_should_be_ignored_and_no_outer_order_by_negative(
+    session, caplog
+):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "ORDER BY"]
+        )
+    ).write.save_as_table(table_name, table_type="temp")
+    caplog.clear()
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        df = pd.read_snowflake(f"SELECT * FROM (SELECT * FROM {table_name} ORDER BY 1)")
+        df.to_pandas()  # Force materialization of snowpark dataframe backing this dataframe.
+    # Ideally, in this case, we would optimize away the ORDER BY, since it has no bearing
+    # on the final result; however, we use the logical plan of a SQL Query to determine if
+    # its got an ORDER BY, and the logical plan bubbles up nested inner ORDER BY's
+    # (for context as to why, see Thierry's message here:
+    # https://snowflake.slack.com/archives/C02BTC3HY/p1708032327090439?thread_ts=1708025496.641369&cid=C02BTC3HY)
+    # so we still include the sort in our code.
+    # verify that we use the metadata row number (not call row_number), since there's no sort.
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+    assert df.columns.tolist() == ["A", "B", "ORDER BY"]
+
+
+@sql_count_checker(query_count=5)
+def test_order_by_with_no_limit_but_colname_shadows(session, caplog):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    native_df = native_pd.DataFrame(
+        [[1, 2, 4], [4, 5, 6], [7, 8, 3]], columns=["A", "B", "LIMIT 1"]
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    WarningMessage.printed_warnings = set()
+    caplog.clear()
+    with caplog.at_level(logging.DEBUG):
+        df = pd.read_snowflake(f'SELECT * FROM {table_name} ORDER BY "LIMIT 1"')
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            df.sort_values("A").reset_index(drop=True),
+            native_df.sort_values("A").reset_index(drop=True),
+        )
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+
+
+@sql_count_checker(query_count=5)
+def test_order_by_with_limit_and_name_shadows(session, caplog):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    native_df = native_pd.DataFrame(
+        [[1, 2, 4], [4, 5, 6], [7, 8, 3]], columns=["A", "B", "LIMIT 1"]
+    )
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        df = pd.read_snowflake(f'SELECT * FROM {table_name} ORDER BY "LIMIT 1" LIMIT 2')
+        assert len(df) == 2
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+
+
+@sql_count_checker(query_count=6, join_count=2)
+def test_read_snowflake_query_complex_query_with_join_and_order_by(session, caplog):
+    # create table
+    table_name1 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[10, "car"], [3, "bus"], [6, "train"]],
+            columns=["price to consumer", "mode of transportation"],
+        )
+    ).write.save_as_table(table_name1, table_type="temp")
+    table_name2 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[5, "car"], [0.5, "bus"], [2, "train"]],
+            columns=["cost to operator", "mode of transportation"],
+        )
+    ).write.save_as_table(table_name2, table_type="temp")
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        df = pd.read_snowflake(
+            f'SELECT "price to consumer" - "cost to operator" as "profit", "mode of transportation" FROM {table_name1} NATURAL JOIN {table_name2} ORDER BY "profit"'
+        )
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
+    pdf = native_pd.DataFrame(
+        [[5, "car"], [2.5, "bus"], [4, "train"]],
+        columns=["profit", "mode of transportation"],
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        df.sort_values("profit").reset_index(drop=True),
+        pdf.sort_values("profit").reset_index(drop=True),
+    )
+
+
+@pytest.mark.parametrize("ordinal", [1, 2, 28])
+@sql_count_checker(query_count=5)
+def test_order_by_with_position_key(session, ordinal, caplog):
+    column_order = [
+        "col12",
+        "col1",
+        "col10",
+        "col11",
+        "col16",
+        "col24",
+        "col22",
+        "col20",
+        "col28",
+        "col26",
+        "col13",
+        "col15",
+        "col23",
+        "col14",
+        "col5",
+        "col18",
+        "col3",
+        "col6",
+        "col2",
+        "col4",
+        "col19",
+        "col0",
+        "col7",
+        "col8",
+        "col27",
+        "col29",
+        "col17",
+        "col9",
+        "col25",
+        "col21",
+    ]
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    # Want random permutation, but need to make sure that there are no duplicates in the sorting column
+    # as otherwise ties may be broken differently between us and vanilla pandas.
+    native_df = native_pd.DataFrame(
+        np.arange(60).reshape((2, 30)),
+        columns=[f"col{i}" for i in range(30)],
+    )
+    native_df[column_order[ordinal]].iloc[0] = np.nan
+    session.create_dataframe(native_df).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    columns = ", ".join([f'"{col_name}"' for col_name in column_order])
+    WarningMessage.printed_warnings = set()
+    with caplog.at_level(logging.DEBUG):
+        snow_df = pd.read_snowflake(
+            f"SELECT * from (SELECT {columns} FROM {table_name}) ORDER BY {ordinal + 1} ASC NULLS LAST"
+        )
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_df.sort_values("col12").reset_index(drop=True),
+            native_df[column_order].sort_values("col12").reset_index(drop=True),
+        )
+    assert ORDER_BY_IN_SQL_QUERY_NOT_GUARANTEED_WARNING in caplog.text
diff --git a/tests/integ/modin/io/test_read_snowflake_select_query.py b/tests/integ/modin/io/test_read_snowflake_select_query.py
new file mode 100644
index 00000000000..0e39c2e8c72
--- /dev/null
+++ b/tests/integ/modin/io/test_read_snowflake_select_query.py
@@ -0,0 +1,393 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.analyzer.analyzer_utils import quote_name
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    READ_ONLY_TABLE_SUFFIX,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+)
+from snowflake.snowpark.modin.plugin.utils.exceptions import (
+    SnowparkPandasErrorCode,
+    SnowparkPandasException,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    BASIC_TYPE_DATA1,
+    BASIC_TYPE_DATA2,
+    VALID_SNOWFLAKE_COLUMN_NAMES_AND_ALIASES,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+from tests.utils import Utils
+
+
+@sql_count_checker(query_count=5)
+def test_read_snowflake_basic_query_with_weird_formatting(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe([BASIC_TYPE_DATA1, BASIC_TYPE_DATA2]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    df = pd.read_snowflake(f"(((SELECT * FROM {table_name})))")
+
+    # test if the snapshot is created
+    # the table name should match the following reg expression
+    # "^SNOWPARK_TEMP_TABLE_[0-9A-Z]+$")
+    sql = df._query_compiler._modin_frame.ordered_dataframe.queries["queries"][-1]
+    temp_table_pattern = ".*SNOWPARK_TEMP_TABLE_[0-9A-Z]+.*$"
+    assert re.match(temp_table_pattern, sql) is not None
+    assert READ_ONLY_TABLE_SUFFIX in sql
+
+    # check the row position snowflake quoted identifier is set
+    assert (
+        df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is not None
+    )
+
+    pdf = df.to_pandas()
+    assert pdf.values[0].tolist() == BASIC_TYPE_DATA1
+    assert pdf.values[1].tolist() == BASIC_TYPE_DATA2
+
+
+@sql_count_checker(query_count=6)
+def test_read_snowflake_basic_query_with_comment_preceding_sql_inline_string(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe([BASIC_TYPE_DATA1, BASIC_TYPE_DATA2]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    df = pd.read_snowflake(f"--SQL Comment\nSELECT * FROM {table_name}")
+
+    # test if the snapshot is created
+    # the table name should match the following reg expression
+    # "^SNOWPARK_TEMP_TABLE_[0-9A-Z]+$")
+    sql = df._query_compiler._modin_frame.ordered_dataframe.queries["queries"][-1]
+    temp_table_pattern = ".*SNOWPARK_TEMP_TABLE_[0-9A-Z]+.*$"
+    assert re.match(temp_table_pattern, sql) is not None
+    assert READ_ONLY_TABLE_SUFFIX in sql
+
+    # check the row position snowflake quoted identifier is set
+    assert (
+        df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is not None
+    )
+
+    pdf = df.to_pandas()
+    assert pdf.values[0].tolist() == BASIC_TYPE_DATA1
+    assert pdf.values[1].tolist() == BASIC_TYPE_DATA2
+
+
+@sql_count_checker(query_count=5)
+def test_read_snowflake_basic_query_with_comment_preceding_sql_multiline_string(
+    session,
+):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe([BASIC_TYPE_DATA1, BASIC_TYPE_DATA2]).write.save_as_table(
+        table_name, table_type="temp"
+    )
+    df = pd.read_snowflake(
+        f"""--SQL Comment 1
+                           -- SQL Comment 2
+                           SELECT * FROM {table_name}
+                            -- SQL Comment 3"""
+    )
+
+    # test if the snapshot is created
+    # the table name should match the following reg expression
+    # "^SNOWPARK_TEMP_TABLE_[0-9A-Z]+$")
+    sql = df._query_compiler._modin_frame.ordered_dataframe.queries["queries"][-1]
+    temp_table_pattern = ".*SNOWPARK_TEMP_TABLE_[0-9A-Z]+.*$"
+    assert re.match(temp_table_pattern, sql) is not None
+    assert READ_ONLY_TABLE_SUFFIX in sql
+
+    # check the row position snowflake quoted identifier is set
+    assert (
+        df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is not None
+    )
+
+    pdf = df.to_pandas()
+    assert pdf.values[0].tolist() == BASIC_TYPE_DATA1
+    assert pdf.values[1].tolist() == BASIC_TYPE_DATA2
+
+
+@sql_count_checker(query_count=5)
+@pytest.mark.parametrize("only_nulls", [True, False])
+def test_read_snowflake_query_none_nan_condition(session, only_nulls):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[1, 2, None], [3, 4, 5], [6, 7, float("nan")]], columns=["A", "B", "C"]
+        )
+    ).write.save_as_table(table_name, table_type="temp")
+
+    # crate snowpark pandas dataframe
+    df = pd.read_snowflake(
+        f"SELECT * FROM {table_name} WHERE C IS {'NOT' if not only_nulls else ''} NULL"
+    )
+    if not only_nulls:
+        pdf = native_pd.DataFrame([[3, 4, 5]], columns=["A", "B", "C"])
+    else:
+        pdf = native_pd.DataFrame(
+            [[1, 2, None], [6, 7, float("nan")]], columns=["A", "B", "C"]
+        )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(df, pdf)
+
+
+@pytest.mark.parametrize(
+    "col_name_and_alias_tuple", VALID_SNOWFLAKE_COLUMN_NAMES_AND_ALIASES
+)
+@sql_count_checker(query_count=5)
+def test_read_snowflake_query_aliased_columns(session, col_name_and_alias_tuple):
+    # create table
+    col_name, alias = col_name_and_alias_tuple
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, f"{col_name} int", is_temporary=True)
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(f"SELECT {col_name} AS {alias} FROM {table_name}")
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert pdf.columns[0] == extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(alias)
+    )
+
+
+@pytest.mark.parametrize(
+    "col_name_and_alias_tuple", VALID_SNOWFLAKE_COLUMN_NAMES_AND_ALIASES
+)
+@sql_count_checker(query_count=5)
+def test_read_snowflake_query_aliased_columns_and_columns_kwarg_specified(
+    session, col_name_and_alias_tuple
+):
+    # create table
+    col_name, alias = col_name_and_alias_tuple
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, f"{col_name} int", is_temporary=True)
+
+    # create snowpark pandas dataframe
+    pandas_col_name = extract_pandas_label_from_snowflake_quoted_identifier(
+        quote_name(alias)
+    )
+    df = pd.read_snowflake(
+        f"SELECT {col_name} AS {alias}, {col_name} FROM {table_name}",
+        columns=[pandas_col_name],
+    )
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert pdf.columns[0] == pandas_col_name
+    assert len(pdf.columns) == 1
+
+
+@sql_count_checker(query_count=3)
+def test_read_snowflake_query_with_columns(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, '"col0" int, "col1" int', is_temporary=True)
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(f"SELECT * FROM {table_name}", columns=["col0"])
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert len(pdf.columns) == 1
+    assert pdf.columns[0] == "col0"
+
+
+@sql_count_checker(query_count=3)
+def test_read_snowflake_query_with_index_col_and_columns(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session,
+        table_name,
+        '"index_col" int, "col0" int, "col1" int',
+        is_temporary=True,
+    )
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(
+        f"SELECT * FROM {table_name}", columns=["col0"], index_col="index_col"
+    )
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert len(pdf.columns) == 1
+    assert pdf.columns[0] == "col0"
+    assert pdf.index.name == "index_col"
+
+
+@sql_count_checker(query_count=3)
+def test_read_snowflake_query_with_index_col_and_columns_overlap(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session,
+        table_name,
+        '"index_col" int, "col0" int, "col1" int',
+        is_temporary=True,
+    )
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(
+        f"SELECT * FROM {table_name}",
+        columns=["col0", "index_col"],
+        index_col="index_col",
+    )
+    pdf = df.to_pandas()
+    assert pdf.index.dtype == np.int64
+    assert pdf.columns.equals(pd.Index(["col0", "index_col"]))
+    assert pdf.index.name == "index_col"
+
+
+@sql_count_checker(query_count=5)
+def test_read_snowflake_query_additional_derived_column(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame([[1, 2], [3, 4], [6, 7]], columns=["A", "B"])
+    ).write.save_as_table(table_name, table_type="temp")
+
+    df = pd.read_snowflake(
+        f"SELECT A, B, SQUARE(A) + SQUARE(B) as C FROM {table_name}", index_col="C"
+    )
+    pdf = native_pd.DataFrame(
+        [[1, 2], [3, 4], [6, 7]],
+        index=native_pd.Index([5, 25, 85], name="C"),
+        columns=["A", "B"],
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        df, pdf, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    # non_existing_index_col latter doesn't exist in the table because we require
+    # it equals to extract_pandas_label_from_snowflake_quoted_identifier(quote_name(col_name))
+    # in read_snowflake_query
+    "col_name, non_existing_index_col",
+    (
+        ("col", "test"),
+        ("col", "col"),
+        ("COL", "col"),
+        ('"col"', "COL"),
+        ('"COL"', "col"),
+    ),
+)
+@sql_count_checker(query_count=2)
+def test_read_snowflake_query_non_existing(
+    session,
+    col_name,
+    non_existing_index_col,
+):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, f"{col_name} int", is_temporary=True)
+    with pytest.raises(
+        KeyError,
+        match="is not in existing snowflake columns",
+    ):
+        pd.read_snowflake(
+            f"SELECT * FROM {table_name}", index_col=non_existing_index_col
+        )
+
+
+@sql_count_checker(query_count=5)
+def test_read_snowflake_query_duplicate_columns(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(session, table_name, '"X" int, Y int', is_temporary=True)
+
+    df = pd.read_snowflake(f"SELECT * FROM {table_name}", index_col=["X", "X"])
+    assert df.index.names == ["X", "X"]
+    assert df.columns.tolist() == ["Y"]
+
+    df = pd.read_snowflake(f"SELECT * FROM {table_name}", index_col=["X", "Y"])
+    assert df.index.names == ["X", "Y"]
+    assert df.columns.tolist() == []
+
+
+@sql_count_checker(query_count=0)
+def test_read_snowflake_query_table_not_exist_negative() -> None:
+    table_name = "non_exist_table_error"
+
+    with pytest.raises(SnowparkPandasException) as ex:
+        pd.read_snowflake(f"SELECT * FROM {table_name}")
+
+    assert ex.value.error_code == SnowparkPandasErrorCode.GENERAL_SQL_EXCEPTION.value
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize(
+    "bad_sql", ["SELET * FROM A", "WITH T1 as (SELECT * FROM A), SELECT * FROM T1"]
+)
+def test_read_snowflake_query_table_bad_sql_negative(bad_sql) -> None:
+    with pytest.raises(SnowparkPandasException) as ex:
+        pd.read_snowflake(bad_sql)
+
+    assert ex.value.error_code == SnowparkPandasErrorCode.GENERAL_SQL_EXCEPTION.value
+
+
+@sql_count_checker(query_count=6, join_count=2)
+def test_read_snowflake_query_complex_query_with_join(session):
+    # create table
+    table_name1 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[10, "car"], [3, "bus"], [6, "train"]],
+            columns=["price to consumer", "mode of transportation"],
+        )
+    ).write.save_as_table(table_name1, table_type="temp")
+    table_name2 = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        native_pd.DataFrame(
+            [[5, "car"], [0.5, "bus"], [2, "train"]],
+            columns=["cost to operator", "mode of transportation"],
+        )
+    ).write.save_as_table(table_name2, table_type="temp")
+    df = pd.read_snowflake(
+        f"""SELECT "price to consumer" - "cost to operator" as "profit",
+        "mode of transportation" FROM {table_name1} NATURAL JOIN {table_name2}"""
+    )
+    pdf = native_pd.DataFrame(
+        [[5, "car"], [2.5, "bus"], [4, "train"]],
+        columns=["profit", "mode of transportation"],
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(df, pdf)
+
+
+@sql_count_checker(query_count=7)
+def test_read_snowflake_query_connect_by(session):
+    # create table
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.sql(
+        f"CREATE OR REPLACE TABLE {table_name} (title VARCHAR, employee_ID INTEGER, manager_ID INTEGER)"
+    ).collect()
+    session.sql(
+        f"""INSERT INTO {table_name} (title, employee_ID, manager_ID) VALUES
+                    ('President', 1, NULL),  -- The President has no manager.
+                        ('Vice President Engineering', 10, 1),
+                            ('Programmer', 100, 10),
+                            ('QA Engineer', 101, 10),
+                        ('Vice President HR', 20, 1),
+                            ('Health Insurance Analyst', 200, 20)"""
+    ).collect()
+    SQL_QUERY = f"""SELECT employee_ID, manager_ID, title
+                    FROM {table_name}
+                        START WITH title = 'President'
+                        CONNECT BY
+                        manager_ID = PRIOR employee_id
+                    ORDER BY employee_ID"""
+    native_df = native_pd.DataFrame(session.sql(SQL_QUERY).collect())
+    snow_df = (
+        pd.read_snowflake(SQL_QUERY).sort_values("EMPLOYEE_ID").reset_index(drop=True)
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)
diff --git a/tests/integ/modin/io/test_to_dict.py b/tests/integ/modin/io/test_to_dict.py
new file mode 100644
index 00000000000..7750651afda
--- /dev/null
+++ b/tests/integ/modin/io/test_to_dict.py
@@ -0,0 +1,58 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from collections import OrderedDict, defaultdict
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from numpy.testing import assert_equal
+from pandas.testing import assert_series_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+DICT_CLASSES = [dict, OrderedDict, defaultdict(int), defaultdict(list)]
+
+
+@pytest.mark.parametrize("into", DICT_CLASSES)
+@pytest.mark.parametrize(
+    "index",
+    [
+        ["a", "b", "c", "d"],
+        pd.MultiIndex.from_arrays([[1, 1, 2, 2], ["row1", "row2", "row1", "row2"]]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_to_dict(into, index):
+    native_series = native_pd.Series([1, 2, None, 4], index=index)
+    snow_series = pd.Series(native_series)
+    native_result = native_series.to_dict(into)
+    snow_result = snow_series.to_dict(into)
+    assert type(native_result) == type(snow_result)
+    # numpy.testing.assert_equal can handle nan equality
+    assert_equal(native_result, snow_result)
+
+
+@pytest.mark.parametrize(
+    "orient", ["dict", "list", "series", "split", "tight", "records", "index"]
+)
+@pytest.mark.parametrize("into", DICT_CLASSES)
+@pytest.mark.parametrize(
+    "index", [["row1", "row2"], pd.MultiIndex.from_arrays([[1, 2], ["row1", "row2"]])]
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_to_dict(orient, into, index):
+    native_df = native_pd.DataFrame({"col1": [1, 2], "col2": [0.5, None]}, index=index)
+    snow_df = pd.DataFrame(native_df)
+    native_result = native_df.to_dict(orient, into)
+    snow_result = snow_df.to_dict(orient, into)
+    assert type(native_result) == type(snow_result)
+    if orient == "series":
+        assert native_result.keys() == snow_result.keys()
+        for s1, s2 in zip(native_result.values(), snow_result.values()):
+            assert_series_equal(s1, s2, check_dtype=False, check_index_type=False)
+    else:
+        # numpy.testing.assert_equal can handle nan equality
+        assert_equal(native_result, snow_result)
diff --git a/tests/integ/modin/io/test_to_pandas.py b/tests/integ/modin/io/test_to_pandas.py
new file mode 100644
index 00000000000..1aae79f3887
--- /dev/null
+++ b/tests/integ/modin/io/test_to_pandas.py
@@ -0,0 +1,27 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal, assert_series_equal
+
+
+@sql_count_checker(query_count=2)
+def test_pd_to_pandas():
+    data = {"a": [1, 2, 3], "b": ["x", "y", "z"]}
+    assert_frame_equal(
+        pd.to_pandas(pd.DataFrame(data)),
+        native_pd.DataFrame(data),
+        check_index_type=False,
+        check_dtype=False,
+    )
+    assert_series_equal(
+        pd.to_pandas(pd.Series(data["a"])),
+        native_pd.Series(data["a"]),
+        check_index_type=False,
+        check_dtype=False,
+    )
diff --git a/tests/integ/modin/pandas_api_coverage.py b/tests/integ/modin/pandas_api_coverage.py
new file mode 100644
index 00000000000..a44f75143c7
--- /dev/null
+++ b/tests/integ/modin/pandas_api_coverage.py
@@ -0,0 +1,194 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import csv
+import inspect
+import time
+from datetime import datetime
+from functools import update_wrapper
+
+import modin.pandas as pdi
+from modin.pandas import DataFrame as DataFrameClazz, Series as SeriesClazz
+from modin.pandas.groupby import (
+    DataFrameGroupBy as DataFrameGroupByClazz,
+    SeriesGroupBy as SeriesGroupByClazz,
+)
+from modin.pandas.plotting import Plotting as PlottingClazz
+from modin.pandas.resample import Resampler as ResamplerClazz
+from modin.pandas.series_utils import DatetimeProperties as DatetimePropertiesClazz
+
+# Not in current version of Modin
+# from modin.pandas.window import Expanding as ExpandingClazz
+from modin.pandas.window import Rolling as RollingClazz, Window as WindowClazz
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+# Used for instrumenting customer or client code.
+# import pandas as pdi
+# import pandas
+# from pandas.core.groupby import DataFrameGroupBy as DataFrameGroupByClazz
+# from pandas.core.groupby import SeriesGroupBy as SeriesGroupByClazz
+# from pandas.core.resample import Resampler as ResamplerClazz
+# from pandas.core.indexes.accessors import DatetimeProperties as DatetimePropertiesClazz
+# from pandas.core.window import Expanding as ExpandingClazz
+# from pandas.core.window import Rolling as RollingClazz
+# from pandas.core.window import Window as WindowClazz
+
+dunder_api_methods = ["__add__", "__dataframe__", "__array__", "__iter__"]
+
+
+class PandasAPICoverageGenerator:
+    _instrumentation_initialized = False
+    _writer = None
+
+    def __init__(self) -> "PandasAPICoverageGenerator":
+        self.init_instrumentation()
+
+    def _instrument_function(self, func, classname, funcname, writer):
+        def wrapped(*args, **kwargs):
+            excp = None
+            start_time = time.perf_counter()
+            try:
+                output = func(*args, **kwargs)
+            except Exception as e:
+                excp = e
+                raise e
+            finally:
+                stop_time = time.perf_counter()
+                expname = None if excp is None else excp.__class__.__name__
+                non_none_params = list(
+                    dict(filter(lambda x: x[1] is not None, kwargs.items())).keys()
+                )
+                data = {
+                    "class": classname,
+                    "method": funcname,
+                    "params": non_none_params,
+                    "exception": expname,
+                    "start": start_time,
+                    "stop": stop_time,
+                }
+                writer.write(data)
+            return output
+
+        update_wrapper(wrapped, func)
+        return wrapped
+
+    def _instrument_class(self, clazz, writer):
+        # The Dtypes are not only noisy to instrument
+        # they also introduce some unique issues with
+        # typecasting once they are modified
+        if clazz.__name__.endswith("Dtype"):
+            return
+        if hasattr(clazz, f"_snowpark_instrumented_{clazz.__name__}"):
+            return
+        for funcname in dir(clazz):
+            if funcname.startswith("_") and funcname not in dunder_api_methods:
+                continue
+            func = getattr(clazz, funcname)
+            # Properties need to have their getter wrapped
+            if isinstance(func, property):
+                wrapped_getter = self._instrument_function(
+                    func.__get__, clazz.__name__, funcname, writer
+                )
+                new_property = property(wrapped_getter, func.__set__, func.__delattr__)
+                try:
+                    setattr(clazz, funcname, new_property)
+                except Exception:  # noqa: E722 # nosec
+                    pass
+            # Anything else that is not callable should be ignored
+            if not callable(func):
+                continue
+            # A property reference pointing at an implementation
+            # class using __get__ or other methods. Instead of
+            # modifying this property on the class we descend one
+            # level into the implementation class
+            if inspect.isclass(func):
+                self._instrument_class(func, writer)
+                continue
+            wrapped_func = self._instrument_function(
+                func, clazz.__name__, funcname, writer
+            )
+            try:
+                setattr(clazz, funcname, wrapped_func)
+            except Exception:  # noqa: E722 # nosec
+                pass
+        try:
+            setattr(clazz, f"_snowpark_instrumented_{clazz.__name__}", True)
+        except Exception:  # noqa: E722 # nosec
+            pass
+
+    def _instrument_module(self, module, writer):
+        exports = module.__all__
+        for name in exports:
+            if name.startswith("_") and name not in dunder_api_methods:
+                continue
+            object = ""
+            try:
+                object = getattr(module, name)
+            except Exception:
+                # Snowpandas Modin __init__ seems to reference functions
+                # that do not exist
+                pass
+            if inspect.isclass(object):
+                self._instrument_class(object, writer)
+                continue
+            if callable(object):
+                self._instrument_function
+                wrapped_func = self._instrument_function(
+                    object, module.__name__, name, writer
+                )
+                try:
+                    setattr(module, name, wrapped_func)
+                except Exception:  # noqa: E722 # nosec
+                    pass
+
+    def init_instrumentation(self):
+        if PandasAPICoverageGenerator._instrumentation_initialized is True:
+            return
+        PandasAPICoverageGenerator._instrumentation_initialized = True
+        header = ["class", "method", "params", "exception", "start", "stop"]
+        PandasAPICoverageGenerator._writer = RecordWriter(header)
+
+        clazzes = [
+            DataFrameGroupByClazz,
+            PlottingClazz,
+            SeriesGroupByClazz,
+            ResamplerClazz,
+            RollingClazz,
+            DatetimePropertiesClazz,
+            WindowClazz,
+            SeriesClazz,
+            DataFrameClazz,
+            # ExpandingClazz  # Not pulled in from upstream yet
+        ]
+        for clazz in clazzes:
+            self._instrument_class(clazz, PandasAPICoverageGenerator._writer)
+
+        self._instrument_module(pdi, PandasAPICoverageGenerator._writer)
+
+
+class RecordWriter:
+    _filename = None
+
+    def __init__(self, header) -> "RecordWriter":
+        now = datetime.now()
+        dt_string = now.strftime("%Y%m%d_%H%M%S")
+        RecordWriter._filename = f"record-{dt_string}.csv"
+        output = open(RecordWriter._filename, "a")
+        writer = csv.DictWriter(output, fieldnames=header)
+        writer.writeheader()
+        self._writer = writer
+        self._file = output
+
+    def write(self, data):
+        self._writer.writerow(data)
+        # dtor not reliably called on process exit
+        self._file.flush()
+
+    def close(self):
+        self._file.flush()
+        self._file.close()
+
+    def __del__(self):
+        self.close()
diff --git a/tests/integ/modin/pivot/__init__.py b/tests/integ/modin/pivot/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/pivot/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/pivot/conftest.py b/tests/integ/modin/pivot/conftest.py
new file mode 100644
index 00000000000..d099d389130
--- /dev/null
+++ b/tests/integ/modin/pivot/conftest.py
@@ -0,0 +1,312 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="module")
+def df_data():
+    return {
+        "A": [
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+            "bar",
+            "bar",
+            "bar",
+            "bar",
+            "foo",
+            "foo",
+            "foo",
+        ],
+        "B": [
+            "on.e",
+            "on.e",
+            "on.e",
+            'tw"o',
+            "on.e",
+            "on.e",
+            "on.e",
+            'tw"o',
+            'tw"o',
+            'tw"o',
+            "on.e",
+        ],
+        "C": [
+            "dull",
+            "dull",
+            "shi'ny",
+            "dull",
+            "dull",
+            "shi'ny",
+            "shi'ny",
+            "dull",
+            "shi'ny",
+            "shi'ny",
+            "shi'ny",
+        ],
+        "D": np.arange(0, 11),
+        "E": np.arange(1, 12),
+        "F": np.arange(2, 13),
+    }
+
+
+@pytest.fixture(scope="module")
+def df_data_with_duplicates():
+    return (
+        # NOTE: This call to np.array converts all the numbers to string
+        np.array(
+            [
+                [
+                    "foo",
+                    "one",
+                    "dull",
+                    0,
+                    1,
+                    2,
+                    3,
+                ],
+                [
+                    "foo",
+                    "one",
+                    "dull",
+                    1,
+                    2,
+                    3,
+                    4,
+                ],
+                [
+                    "foo",
+                    "one",
+                    "shiny",
+                    2,
+                    3,
+                    4,
+                    5,
+                ],
+                [
+                    "foo",
+                    "two",
+                    "dull",
+                    3,
+                    4,
+                    5,
+                    6,
+                ],
+                [
+                    "bar",
+                    "one",
+                    "dull",
+                    4,
+                    5,
+                    6,
+                    7,
+                ],
+                [
+                    "bar",
+                    "one",
+                    "shiny",
+                    5,
+                    6,
+                    7,
+                    8,
+                ],
+                [
+                    "bar",
+                    "two",
+                    "shiny",
+                    6,
+                    7,
+                    8,
+                    9,
+                ],
+            ]
+        ),
+        ["A", "A", "C", "D", "D", "E", "F"],
+    )
+
+
+@pytest.fixture(scope="module")
+def df_encoded_data():
+    return {
+        "A": [
+            'fo"o',
+            "foo",
+            "foo",
+            'fo"o',
+            "bar",
+            "bar",
+            "bar",
+            "bar",
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+        ],
+        "B": [
+            "b\\ar",
+            '"bar',
+            'b"ar"',
+            'bar","',
+            None,
+            'bar"',
+            ",",
+            '","',
+            "b\\ar",
+            "b\nar",
+            "b\tar",
+            "','",
+        ],
+        "C": [
+            "dull",
+            'du"ll',
+            "sh\niny",
+            "dull",
+            "dull",
+            "shiny",
+            "shiny",
+            "dull",
+            "shi\\ny",
+            "shiny",
+            "sh\niny",
+            None,
+        ],
+        "D": np.arange(0, 12),
+    }
+
+
+@pytest.fixture(scope="module")
+def df_small_encoded_data():
+    return {
+        "A": [
+            "foo",
+        ],
+        "B": [
+            "one",
+        ],
+        "C": [
+            "shi\\'ny",
+        ],
+        "D": [
+            123,
+        ],
+    }
+
+
+@pytest.fixture(scope="module")
+def df_data_small():
+    return {
+        "ax": [
+            "foo",
+            "foo",
+            "foo",
+            "bar",
+            "bar",
+            "bar",
+        ],
+        "aX": [
+            "one",
+            "two",
+            "two",
+            "one",
+            "one",
+            "two",
+        ],
+        "AX": [
+            "dull",
+            "dull",
+            "shiny",
+            "dull",
+            "shiny",
+            "dull",
+        ],
+        "DS": np.arange(0, 6),
+        "ET": [0, 1, None, None, None, 2],
+        "FV": [None, None, None, None, None, None],
+    }
+
+
+@pytest.fixture(scope="module")
+def df_data_with_nulls():
+    return {
+        "A": [
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+            "bar",
+            "bar",
+            "bar",
+            "bar",
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+            "foo",
+            "bar",
+        ],
+        "B": [
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "baz",
+            "buz",
+            "buz",
+            "baz",
+            "baz",
+        ],
+        "C": [
+            "dull",
+            "dull",
+            "shiny",
+            "dull",
+            "dull",
+            "shiny",
+            "shiny",
+            "dull",
+            "dull",
+            "shiny",
+            "shiny",
+            "shiny",
+            "spot",
+            "spot",
+        ],
+        "D": np.arange(0, 14),
+        "E": [0, 1, 2, 3, None, 5, 6, 7, 8, None, 10, None, None, None],
+        "F": [
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+        ],
+    }
+
+
+@pytest.fixture(scope="module")
+def df_data_with_nulls_2():
+    return {
+        "A": ["foo", "bar"],
+        "B": ["shiny", "dull"],
+        "C": ["up", "down"],
+        "D": [None, None],
+        "E": [1, 2],
+        "F": [None, 2],
+    }
diff --git a/tests/integ/modin/pivot/pivot_utils.py b/tests/integ/modin/pivot/pivot_utils.py
new file mode 100644
index 00000000000..c1cba355834
--- /dev/null
+++ b/tests/integ/modin/pivot/pivot_utils.py
@@ -0,0 +1,109 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Any, Callable, Optional, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+def pivot_table_test_helper(
+    df_data: Union[list, dict, np.array],
+    pivot_table_kwargs: dict[Any, Any],
+    preprocess_df: Optional[
+        Callable[
+            [Union[pd.DataFrame, native_pd.DataFrame]],
+            Union[pd.DataFrame, native_pd.DataFrame],
+        ]
+    ] = None,
+    coerce_to_float64: bool = True,
+    expect_exception: bool = False,
+    expect_exception_type: Optional[type[Exception]] = None,
+    expect_exception_match: Optional[str] = None,
+    assert_exception_equal: bool = True,
+):
+    """
+    Helper for validating pivot_table tests, specifically this ensures the output is normalized to float64
+    with acceptable precision so can compare the results if coerce_to_float64 is True.
+
+    df_data: The raw data to be put in df as list of data/columns, dictionary of data values (col:series) or np.array
+    preprocess_df: Optional callable to apply before the pivot_table operation on dataframe
+    pivot_table_kwargs: Arguments to pass to pivot_table call
+    coerce_to_float64: Coerce the results to float64 result since irrational numbers can result in object type
+    expect_exception: Whether the call *should* raise an exception
+    expect_exception_type: if not None, assert the exception type is expected
+    expect_exception_match: if not None, assert the exception match the expected regex
+    assert_exception_equal: bool. Whether to assert the exception from Snowpark pandas eqauls to pandas
+    """
+    if isinstance(df_data, tuple):
+        native_df = native_pd.DataFrame(df_data[0], columns=df_data[1])
+        snow_df = pd.DataFrame(df_data[0], columns=df_data[1])
+    else:
+        native_df = native_pd.DataFrame(df_data)
+        snow_df = pd.DataFrame(df_data)
+
+    if preprocess_df:
+        native_df = preprocess_df(native_df)
+        snow_df = preprocess_df(snow_df)
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.pivot_table(**pivot_table_kwargs),
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64
+        if coerce_to_float64
+        else assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+        expect_exception=expect_exception,
+        expect_exception_type=expect_exception_type,
+        expect_exception_match=expect_exception_match,
+        assert_exception_equal=assert_exception_equal,
+    )
+
+
+def pivot_table_test_helper_expects_exception(
+    df_data: Union[list, dict, np.array],
+    pivot_table_kwargs: dict[Any, Any],
+    preprocess_df: Optional[
+        Callable[
+            [Union[pd.DataFrame, native_pd.DataFrame]],
+            Union[pd.DataFrame, native_pd.DataFrame],
+        ]
+    ] = None,
+    expect_exception_type: Optional[type[Exception]] = None,
+    expect_exception_match: Optional[str] = None,
+    assert_exception_equal: bool = True,
+):
+    """
+    Helper for validating pivot_table tests, specifically this expects an exception to be raised.
+
+    df_data: The raw data to be put in df as list of data/columns, dictionary of data values (col:series) or np.array
+    pivot_table_kwargs: Arguments to pass to pivot_table call
+    preprocess_df: Optional callable to apply before the pivot_table operation on dataframe
+    expect_exception_type: if not None, assert the exception type is expected
+    expect_exception_match: if not None, assert the exception match the expected regex
+    assert_exception_equal: bool. Whether to assert the exception from Snowpark pandas eqauls to pandas
+    """
+    pivot_table_test_helper(
+        df_data,
+        pivot_table_kwargs,
+        preprocess_df=preprocess_df,
+        expect_exception=True,
+        expect_exception_type=expect_exception_type,
+        expect_exception_match=expect_exception_match,
+        assert_exception_equal=assert_exception_equal,
+    )
+
+
+def df_pivot_table(df, *args, **kwargs):
+    """
+    Wrapper for pivot table calls in case we need to add other checks or pre-/post- processing steps.
+    """
+    return df.pivot_table(*args, **kwargs)
diff --git a/tests/integ/modin/pivot/test_pivot_dropna.py b/tests/integ/modin/pivot/test_pivot_dropna.py
new file mode 100644
index 00000000000..4da2bd1821f
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_dropna.py
@@ -0,0 +1,180 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.pivot.pivot_utils import pivot_table_test_helper
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("columns", ["C", ["B", "C"]])
+def test_pivot_table_single_value_with_dropna(df_data_with_nulls, dropna, columns):
+    with SqlCounter(query_count=1, join_count=0 if dropna else 1):
+        pivot_table_test_helper(
+            df_data_with_nulls,
+            {
+                "index": "A",
+                "columns": columns,
+                "values": "D",
+                "dropna": dropna,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "aggfunc, expected_join_count",
+    [
+        ("mean", 5),
+        ({"D": "max", "E": "sum"}, 3),
+        ({"D": ["count", "max"], "E": ["mean", "sum"]}, 7),
+        ({"D": "min", "E": ["mean"]}, 3),
+        (["min", "max"], 11),
+    ],
+)
+def test_pivot_table_multiple_values_dropna_nonnull_data(
+    df_data,
+    aggfunc,
+    expected_join_count,
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data,
+            {
+                "index": ["A"],
+                "columns": ["B", "C"],
+                "values": ["D", "E", "F"],
+                "dropna": False,
+                "aggfunc": aggfunc,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "aggfunc, expected_join_count",
+    [
+        ({"E": "count", "F": ["mean", "sum"]}, 5),
+        ({"E": ["min", "max"], "F": ["mean", "sum"]}, 7),
+        (["min", "max"], 7),
+        ({"E": "min", "F": "mean"}, 3),
+        ({"E": "max", "F": "max"}, 3),
+    ],
+)
+def test_pivot_table_multiple_pivot_values_dropna_null_data(
+    df_data_with_nulls,
+    aggfunc,
+    expected_join_count,
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data_with_nulls,
+            {
+                "index": ["A"],
+                "columns": ["B", "C"],
+                "values": ["E", "F"],
+                "dropna": False,
+                "aggfunc": aggfunc,
+            },
+        )
+
+
+@sql_count_checker(query_count=1, join_count=11)
+def test_pivot_table_multiple_index_single_pivot_values_dropna_null_data(
+    df_data_with_nulls_2,
+):
+    pivot_table_test_helper(
+        df_data_with_nulls_2,
+        {
+            "index": ["A", "B"],
+            "columns": ["C"],
+            "values": ["E", "F"],
+            "dropna": False,
+            "aggfunc": {"E": ["min", "max"], "F": ["mean", "sum"]},
+        },
+    )
+
+
+@pytest.mark.parametrize("values", [["D"], ["E"], ["F"], ["E", "F"]])
+def test_pivot_table_single_all_aggfuncs_dropna_and_null_data(
+    df_data_with_nulls_2,
+    values,
+):
+    expected_join_count = 19 if len(values) > 1 else 9
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data_with_nulls_2,
+            {
+                "index": ["A"],
+                "columns": ["C"],
+                "values": values,
+                "dropna": False,
+                "aggfunc": ["count", "sum", "min", "max", "mean"],
+            },
+        )
+
+
+@sql_count_checker(query_count=1, join_count=7)
+def test_pivot_table_single_nuance_aggfuncs_dropna_and_null_data(
+    df_data_with_nulls_2,
+):
+    # pandas drops the 'D' in the result, but we will not model this way for pandas.
+    #
+    # 839.py:23: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is
+    # deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
+    #
+    snow_df = pd.DataFrame(df_data_with_nulls_2)
+    snow_df = snow_df.pivot_table(
+        index=["A"],
+        columns=["C"],
+        values=["D", "E"],
+        dropna=False,
+        aggfunc=["sum", "mean"],
+    )
+
+    native_df = native_pd.DataFrame(
+        {
+            ("sum", "D", "down"): [0.0, None],
+            ("sum", "D", "up"): [None, 0.0],
+            ("sum", "E", "down"): [2.0, None],
+            ("sum", "E", "up"): [None, 1.0],
+            ("mean", "D", "down"): [None, None],
+            ("mean", "D", "up"): [None, None],
+            ("mean", "E", "down"): [2.0, None],
+            ("mean", "E", "up"): [None, 1.0],
+        }
+    )
+    native_df.index = pd.Index(["bar", "foo"], dtype="object", name="A")
+    native_df.columns.names = [None, None, "C"]
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df,
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    )
+
+
+@pytest.mark.skip("SNOW-857485 Do not handle non-str values for index/values/columns")
+def test_pivot_table_with_multi_index_values_columns_index():
+    pivot_table_test_helper(
+        {
+            ("A", "D"): ["foo", "bar"],
+            ("B", "E"): ["shiny", "dull"],
+            ("A", "F"): [1, 2],
+            ("G", "C"): [None, 2],
+        },
+        {
+            "index": [("A", "D")],
+            "columns": [("B", "E")],
+            "values": [("A", "F"), ("G", "C")],
+            "dropna": False,
+            "aggfunc": "min",
+        },
+    )
diff --git a/tests/integ/modin/pivot/test_pivot_fill_value.py b/tests/integ/modin/pivot/test_pivot_fill_value.py
new file mode 100644
index 00000000000..15ab645090e
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_fill_value.py
@@ -0,0 +1,177 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.pivot.pivot_utils import pivot_table_test_helper
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+
+
+@pytest.mark.parametrize("fill_value", [None, 999, 3.14519])
+@pytest.mark.parametrize("columns", ["C", ["B", "C"]])
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_single_with_fill_value(df_data_with_nulls, fill_value, columns):
+    pivot_table_test_helper(
+        df_data_with_nulls,
+        {
+            "index": "A",
+            "columns": columns,
+            "values": "D",
+            "dropna": False,
+            "fill_value": fill_value,
+        },
+    )
+
+
+# TODO (SNOW-862347): Type incompatible fill_value supported by pandas but not snowflake
+@sql_count_checker(query_count=0)
+def test_pivot_table_single_with_dropna_type_incompatible_fill_value(
+    df_data_with_nulls,
+):
+    snow_df = pd.DataFrame(df_data_with_nulls)
+    with pytest.raises(
+        SnowparkSQLException, match="Numeric value 'XYZ' is not recognized"
+    ):
+        snow_df.pivot_table(
+            index="A",
+            columns="C",
+            values="D",
+            dropna=False,
+            fill_value="XYZ",
+        ).to_pandas()
+
+
+@sql_count_checker(query_count=1, join_count=9)
+def test_pivot_table_multiple_values_fill_value_nonnull_data(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A"],
+            "columns": ["B", "C"],
+            "values": ["D", "E", "F"],
+            "dropna": False,
+            "fill_value": 999.99,
+            "aggfunc": {"D": ["count", "max", "min"], "E": ["mean", "sum"]},
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc, expected_join_count",
+    [
+        ({"E": "count", "F": ["mean", "sum"]}, 5),
+        ({"E": ["min", "max"], "F": ["mean", "sum"]}, 7),
+        (["min", "max"], 7),
+        ({"E": "min", "F": "mean"}, 3),
+        ({"E": "max", "F": "max"}, 3),
+    ],
+)
+def test_pivot_table_multiple_pivot_values_fill_value_null_data(
+    df_data_with_nulls, aggfunc, expected_join_count
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data_with_nulls,
+            {
+                "index": ["A"],
+                "columns": ["B", "C"],
+                "values": ["E", "F"],
+                "dropna": False,
+                "fill_value": 999.99,
+                "aggfunc": aggfunc,
+            },
+        )
+
+
+@sql_count_checker(query_count=1, join_count=11)
+def test_pivot_table_multiple_index_single_pivot_values_dfill_value_null_data(
+    df_data_with_nulls_2,
+):
+    pivot_table_test_helper(
+        df_data_with_nulls_2,
+        {
+            "index": ["A", "B"],
+            "columns": ["C"],
+            "values": ["E", "F"],
+            "dropna": False,
+            "fill_value": 999.99,
+            "aggfunc": {"E": ["min", "max"], "F": ["mean", "sum"]},
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "values, expected_join_count",
+    [(["D"], 9), (["E"], 9), (["F"], 9), (["E", "F"], 19)],
+)
+def test_pivot_table_single_all_aggfuncs_fill_value_and_null_data(
+    df_data_with_nulls_2,
+    values,
+    expected_join_count,
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data_with_nulls_2,
+            {
+                "index": ["A"],
+                "columns": ["C"],
+                "values": values,
+                "dropna": False,
+                "fill_value": 999.99,
+                "aggfunc": ["count", "sum", "min", "max", "mean"],
+            },
+        )
+
+
+@sql_count_checker(query_count=1, join_count=7)
+def test_pivot_table_single_nuance_aggfuncs_fill_value_and_null_data(
+    df_data_with_nulls_2,
+):
+    # pandas drops the 'D' in the result, but we will not model this way for pandas.
+    #
+    # 839.py:23: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is
+    # deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
+    #
+    snow_df = pd.DataFrame(df_data_with_nulls_2)
+
+    FILL_VALUE = 999.99
+
+    TEST_INDEX = "A"
+    TEST_COLUMNS = ["C"]
+    TEST_AGGFUNC = ["sum", "mean"]
+    TEST_VALUES = ["D", "E"]
+    TEST_COLUMN_VALUES = ["down", "up"]
+
+    snow_result_df = snow_df.pivot_table(
+        index=TEST_INDEX,
+        columns=TEST_COLUMNS,
+        values=TEST_VALUES,
+        dropna=False,
+        fill_value=FILL_VALUE,
+        aggfunc=TEST_AGGFUNC,
+    )
+
+    expected_df = native_pd.DataFrame(
+        [
+            [0.0, FILL_VALUE, 2.0, FILL_VALUE, FILL_VALUE, FILL_VALUE, 2.0, FILL_VALUE],
+            [FILL_VALUE, 0.0, FILL_VALUE, 1.0, FILL_VALUE, FILL_VALUE, FILL_VALUE, 1.0],
+        ],
+        columns=pd.MultiIndex.from_product(
+            [TEST_AGGFUNC, TEST_VALUES, TEST_COLUMN_VALUES],
+            names=[None, None] + TEST_COLUMNS,
+        ),
+        index=pd.Index(["bar", "foo"], name=TEST_INDEX),
+    )
+
+    assert_frame_equal(
+        snow_result_df.astype("float64"),
+        expected_df,
+        rtol=1.0e-5,
+    )
diff --git a/tests/integ/modin/pivot/test_pivot_margins.py b/tests/integ/modin/pivot/test_pivot_margins.py
new file mode 100644
index 00000000000..354f1377c07
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_margins.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.pivot.pivot_utils import pivot_table_test_helper
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize("columns", ["C", ["B", "C"]])
+@pytest.mark.parametrize("fill_value", [None, 99.99])
+def test_pivot_table_single_with_dropna_options(
+    df_data_with_nulls, dropna, columns, fill_value
+):
+    expected_join_count = 2 if not dropna else 1
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data_with_nulls,
+            {
+                "index": "A",
+                "columns": columns,
+                "values": "D",
+                "dropna": dropna,
+                "fill_value": fill_value,
+                "margins": True,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        "count",
+        "sum",
+        ["min", "max"],
+        {"E": "mean", "D": "count"},
+        "mean",
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+def test_pivot_table_multiple_columns_values_with_margins(
+    df_data,
+    aggfunc,
+    dropna,
+):
+    expected_join_count = 2
+    if isinstance(aggfunc, list):
+        expected_join_count += 2
+    if not dropna:
+        expected_join_count += expected_join_count
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data,
+            {
+                "index": ["A"],
+                "columns": ["B", "C"],
+                "values": ["D", "E"],
+                "aggfunc": aggfunc,
+                "dropna": dropna,
+                "margins": True,
+            },
+        )
+
+
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        None,
+        # pandas 1.5 had incorrect behavior where the `mean` margin was being set to the fill_value.
+        # Snowpark pandas likewise should not be applying the fill value, but does so anyway
+        pytest.param(
+            99.99,
+            marks=pytest.mark.xfail(
+                strict=True, reason="SNOW-1201908: fill_value should not affect margin"
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=9, union_count=1)
+def test_pivot_table_multiple_pivot_values_null_data_with_margins(
+    df_data_with_nulls, fill_value
+):
+    pivot_table_test_helper(
+        df_data_with_nulls,
+        {
+            "index": ["A", "B"],
+            "columns": "C",
+            "values": "F",
+            "aggfunc": ["count", "sum", "mean"],
+            "dropna": False,
+            "fill_value": fill_value,
+            "margins": True,
+            "margins_name": "TOTAL",
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        None,
+        # pandas 1.5 had incorrect behavior where the `mean` margin was being set to the fill_value.
+        # Snowpark pandas likewise should not be applying the fill value, but does so anyway
+        pytest.param(
+            99.99,
+            marks=pytest.mark.xfail(
+                strict=True, reason="SNOW-1201908: fill_value should not affect margin"
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=6, union_count=1)
+def test_pivot_table_multiple_pivot_values_null_data_with_margins_nan_blocked(
+    df_data_with_nulls, fill_value
+):
+    pivot_table_test_helper(
+        df_data_with_nulls,
+        {
+            "index": ["A", "B"],
+            "columns": "C",
+            "values": "F",
+            "aggfunc": ["min", "max"],
+            "dropna": False,
+            "fill_value": fill_value,
+            "margins": True,
+            "margins_name": "TOTAL",
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=12, union_count=1)
+def test_pivot_table_mixed_index_types_with_margins(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A", "F"],
+            "columns": ["B", "C"],
+            "values": ["D", "E"],
+            "aggfunc": ["min", "max"],
+            "dropna": False,
+            "margins": True,
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=8, union_count=1)
+def test_pivot_table_single_aggfuncs_dropna_and_null_data_pandas_drops_columns(
+    df_data_with_nulls_2,
+):
+    # pandas 1.5 previously dropped columns with all NULL values, but no longer does this
+    # in 2.x.
+    pivot_table_test_helper(
+        df_data_with_nulls_2,
+        {
+            "index": ["A"],
+            "columns": ["B"],
+            "values": ["D", "E"],
+            "aggfunc": ["sum", "mean"],
+            "dropna": False,
+            "margins": True,
+        },
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_pivot_table_unsupported_dropna_with_expanded_aggregation_margins_unsupported(
+    df_data,
+):
+    snow_df = pd.DataFrame(df_data)
+
+    with pytest.raises(
+        ValueError, match="Margins not supported if list of aggregation functions"
+    ):
+        snow_df.pivot_table(
+            index="A",
+            columns="C",
+            values=["E", "F"],
+            aggfunc={"E": ["min"], "F": "max"},
+            margins=True,
+        )
diff --git a/tests/integ/modin/pivot/test_pivot_multiple.py b/tests/integ/modin/pivot/test_pivot_multiple.py
new file mode 100644
index 00000000000..0b1e295ccb2
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_multiple.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import numpy as np
+import pytest
+
+from tests.integ.modin.pivot.pivot_utils import pivot_table_test_helper
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_single_index_single_column_multiple_values(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": "B",
+            "values": ["D", "E"],
+        },
+    )
+
+
+@pytest.mark.parametrize("aggfunc", ["count", "sum", "min", "max", "mean"])
+@sql_count_checker(query_count=1)
+def test_pivot_table_single_index_multiple_column_single_value(df_data, aggfunc):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": "D",
+            "aggfunc": aggfunc,
+        },
+    )
+
+
+@pytest.mark.skip(
+    "SNOW-853416: Some lingering encoding issues and also unsorted order does not match"
+)
+def test_pivot_table_single_index_single_column_multiple_encoded_values(
+    df_encoded_data,
+):
+    pivot_table_test_helper(
+        df_encoded_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": "D",
+        },
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_pivot_table_single_index_single_column_multiple_small_encoded_values(
+    df_small_encoded_data,
+):
+    pivot_table_test_helper(
+        df_small_encoded_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": "D",
+        },
+    )
+
+
+@pytest.mark.skip(
+    "SNOW-853416: Some lingering encoding issues and also unsorted order does not match"
+)
+def test_pivot_table_single_index_single_column_multiple_encoded_values_with_sort(
+    df_encoded_data,
+):
+    pivot_table_test_helper(
+        df_encoded_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": "D",
+            "sort": True,
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_single_index_multiple_columns_multiple_values(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": ["D", "E"],
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_multiple_index_multiple_columns_multiple_values(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A", "F"],
+            "columns": ["B", "C"],
+            "values": ["D", "E"],
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=9)
+def test_pivot_table_single_index_no_column_single_value_multiple_aggr_funcs(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": None,
+            "values": None,
+            "aggfunc": ["min", "max"],
+        },
+    )
+
+
+@pytest.mark.skip(
+    "SNOW-854301: Multi-Index replaces None with Nan causing test to fail"
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        [None],
+        ["D", None],
+    ],
+)
+def test_pivot_table_multiple_index_multiple_columns_null_values(df_data, values):
+    def update_columns_inline(df):
+        df.columns = ["A", "B", "C", "D", None, "F"]
+        return df
+
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A"],
+            "columns": ["B", "C"],
+            "values": values,
+        },
+        preprocess_df=lambda df: update_columns_inline(df),
+    )
+
+
+# TODO (SNOW-854301): Needs support for MultiIndex.levels, fails because result.columns.levels[N] don't equal
+# We use xfail to run so we can help code coverage
+@pytest.mark.xfail
+@pytest.mark.parametrize("values", [None, []])
+@sql_count_checker(query_count=0)
+def test_pivot_table_no_values_by_default(df_data, values):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A", "B", "C"],
+            "columns": ["D", "E", "F"],
+            "values": values,
+            "aggfunc": "min",
+        },
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_single_index_single_column_single_value_multiple_aggr_funcs(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {"index": "A", "columns": "C", "values": "D", "aggfunc": ["min", "max"]},
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_single_index_single_column_single_value_multiple_duplicate_aggr_funcs(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {"index": "A", "columns": "C", "values": "D", "aggfunc": ["min", "min"]},
+    )
+
+
+@sql_count_checker(query_count=1, join_count=3)
+def test_pivot_table_single_index_multiple_column_multiple_values_multiple_duplicate_aggr_funcs(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": ["D", "E"],
+            "aggfunc": ["sum", "sum"],
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc, expected_join_count",
+    [
+        ("mean", 0),
+        (["min", "max"], 1),
+        ({"D": "count"}, 0),
+        ({"D": ["sum"]}, 0),
+        ({"D": ["min", "max"]}, 1),
+    ],
+)
+def test_pivot_table_multiple_index_single_column_single_value_multiple_aggr_funcs(
+    df_data,
+    expected_join_count,
+    aggfunc,
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data,
+            {
+                "index": ["A", "B"],
+                "columns": "C",
+                "values": "D",
+                "aggfunc": aggfunc,
+            },
+        )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_multiple_index_multiple_column_single_value_multiple_aggr_funcs(
+    df_data,
+):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A", "F"],
+            "columns": ["B", "C"],
+            "values": "D",
+            "aggfunc": ["min", "max"],
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc, expected_join_count",
+    [
+        ({"D": [np.min, "count"], "E": [np.mean, np.sum]}, 3),
+        ([np.min, np.max], 3),
+        # Aggregation column E should be dropped since it is not mentioned here.
+        ({"D": ["min", "max"]}, 1),
+    ],
+)
+def test_pivot_table_multiple_index_multiple_column_multiple_values_multiple_aggr_funcs(
+    df_data,
+    aggfunc,
+    expected_join_count,
+):
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        pivot_table_test_helper(
+            df_data,
+            {
+                "index": ["A", "F"],
+                "columns": ["B", "C"],
+                "values": ["D", "E"],
+                "aggfunc": aggfunc,
+            },
+        )
+
+
+@pytest.mark.parametrize("index_keys", ["A", ["A", "B"], "B"])
+@sql_count_checker(query_count=1)
+def test_pivot_table_with_initial_index_and_multiple_columns(df_data, index_keys):
+    pivot_table_test_helper(
+        df_data,
+        pivot_table_kwargs={
+            "index": "A",
+            "columns": ["B", "C"],
+            "values": "D",
+        },
+        preprocess_df=lambda df: df.set_index(keys=index_keys),
+    )
diff --git a/tests/integ/modin/pivot/test_pivot_negative.py b/tests/integ/modin/pivot/test_pivot_negative.py
new file mode 100644
index 00000000000..3751ae11522
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_negative.py
@@ -0,0 +1,192 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from functools import reduce
+
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.pivot.pivot_utils import (
+    pivot_table_test_helper_expects_exception,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.mark.parametrize(
+    "pivot_table_kwargs",
+    [
+        # Reference to None not allowed
+        {"index": [None], "columns": "E", "values": "F"},
+        # Duplicate columns 'A' exist in the dataframe
+        {"index": "A", "columns": "C", "values": "E"},
+        # Reference a column multiple times that only occurs once in dataframe
+        {"index": ["C", "C"], "columns": "F", "values": "E"},
+        # Reference a non-existant column in dataframe
+        {"index": "Z", "columns": "F", "values": "E"},
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_pivot_table_invalid_index_columns_not_supported(
+    df_data_with_duplicates, pivot_table_kwargs
+):
+    pivot_table_test_helper_expects_exception(
+        df_data_with_duplicates, pivot_table_kwargs
+    )
+
+
+@pytest.mark.parametrize(
+    "pivot_table_kwargs",
+    [
+        # Reference to None not allowed
+        {"index": "E", "columns": [None], "values": "F"},
+        # Duplicate columns 'A' exist in the dataframe
+        {"index": "C", "columns": "A", "values": "E"},
+        # Reference a column multiple times that only occurs once in dataframe
+        {"index": None, "columns": ["C", "C"], "values": "E"},
+        # Reference a non-existent column in dataframe
+        {"index": "C", "columns": "Z", "values": "E"},
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_pivot_table_invalid_pivot_labels_not_supported(
+    df_data_with_duplicates, pivot_table_kwargs
+):
+    pivot_table_test_helper_expects_exception(
+        df_data_with_duplicates, pivot_table_kwargs
+    )
+
+
+@pytest.mark.parametrize(
+    "pivot_table_kwargs",
+    [
+        # Reference to None not allowed
+        {"index": "A", "columns": "B", "values": [None]},
+        # Reference to None not allowed
+        {"index": "A", "columns": "B", "values": ["E", None]},
+        # Reference a non-existent column in dataframe
+        {"index": "A", "columns": "B", "values": "Z"},
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_pivot_table_invalid_values_columns_not_supported(df_data, pivot_table_kwargs):
+    pivot_table_test_helper_expects_exception(
+        df_data,
+        pivot_table_kwargs,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_pivot_table_no_index_no_column_single_value_raises_error(df_data):
+    pivot_table_test_helper_expects_exception(
+        df_data,
+        {
+            "index": None,
+            "columns": None,
+            "values": "D",
+        },
+        # we currently throws NotImplementedError if no "index" configuration is provided.
+        # TODO (SNOW-959913): Enable support for no "index" configuration
+        expect_exception_type=NotImplementedError,
+        expect_exception_match="pivot_table with no index configuration is currently not supported",
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        None,
+        [None],
+        ["sum", None],
+        {"C": None},
+        {"C": [None]},
+        {"C": ["max", None]},
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_pivot_table_not_supported_aggfunc_with_null(df_data, aggfunc):
+    pivot_table_test_helper_expects_exception(
+        df_data, {"index": "A", "columns": "C", "values": "E", "aggfunc": aggfunc}
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_pivot_table_not_supported_aggfunc_with_empty_list(df_data):
+    pivot_table_test_helper_expects_exception(
+        df_data,
+        {"index": "A", "columns": "C", "values": "E", "aggfunc": []},
+        expect_exception_type=ValueError,
+        expect_exception_match="Expected at least one aggregation function",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_pivot_table_not_implemented_or_supported(df_data):
+    snow_df = pd.DataFrame(df_data)
+
+    with pytest.raises(NotImplementedError, match="Not implemented observed"):
+        snow_df.pivot_table(index="A", columns="C", values="E", observed=True)
+
+    with pytest.raises(NotImplementedError, match="Not implemented not sorted"):
+        snow_df.pivot_table(index="A", columns="C", values="E", sort=False)
+
+    class Foo:
+        pass
+
+    class Baz:
+        pass
+
+    foo = Foo()
+    baz = Baz()
+    snow_df2 = pd.DataFrame(
+        {
+            foo: ["abc", "def"],
+            "A": ["a", "b"],
+            "B": ["x", "y"],
+            "C": [2, 3],
+            baz: [3, 4],
+        },
+    )
+    foo = Foo()
+    baz = Baz()
+
+    with pytest.raises(NotImplementedError, match="Not implemented non-string"):
+        snow_df2.pivot_table(index=[foo], columns="B", values="C")
+
+    with pytest.raises(NotImplementedError, match="Not implemented non-string"):
+        snow_df2.pivot_table(index="A", columns=[foo], values="E")
+
+    with pytest.raises(NotImplementedError, match="Not implemented non-string"):
+        snow_df2.pivot_table(index="A", columns="B", values=[baz])
+
+    def dummy_aggr_func(series):
+        return reduce(lambda x, y: x + y, series)
+
+    with pytest.raises(
+        NotImplementedError, match=r"Not implemented callable aggregation function .*"
+    ):
+        snow_df.pivot_table(index="A", columns="C", values="E", aggfunc=dummy_aggr_func)
+
+    with pytest.raises(KeyError, match="foo"):
+        snow_df.pivot_table(index="A", columns="C", values="E", aggfunc="foo")
+
+    with pytest.raises(
+        KeyError,
+        match="median",
+    ):
+        snow_df.pivot_table(index="A", columns="C", values="D", aggfunc="median")
+
+    with pytest.raises(
+        NotImplementedError,
+        match="pivot_table with no index configuration is currently not supported",
+    ):
+        snow_df.pivot_table(index=None, columns="C", values="D")
+
+    with pytest.raises(
+        NotImplementedError,
+        match="pivot_table with no index configuration is currently not supported",
+    ):
+        snow_df.pivot_table(index=None, columns=None, values="D")
diff --git a/tests/integ/modin/pivot/test_pivot_single.py b/tests/integ/modin/pivot/test_pivot_single.py
new file mode 100644
index 00000000000..1147d55ca7d
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_single.py
@@ -0,0 +1,209 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+
+# This test file contains tests that execute a single underlying snowpark/snowflake pivot query.
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.pivot.pivot_utils import (
+    pivot_table_test_helper,
+    pivot_table_test_helper_expects_exception,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
+
+
+@pytest.mark.skip(
+    "SNOW-959913: Support no index configuration with columns and margins configuration"
+)
+@sql_count_checker(query_count=1)
+def test_pivot_table_no_index_single_column_single_value(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": None,
+            "columns": "C",
+            "values": "D",
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        "mean",
+        "sum",
+        "min",
+        "max",
+        "count",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_pivot_table_single_index_single_column_single_value(df_data, aggfunc):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": "C",
+            "values": "D",
+            "aggfunc": aggfunc,
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        "count",
+        "sum",
+        "min",
+        "max",
+        "mean",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_pivot_table_multi_index_single_column_single_value(df_data, aggfunc):
+    pivot_table_test_helper(
+        df_data,
+        {"index": ["A", "B"], "columns": "C", "values": "D", "aggfunc": aggfunc},
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_pivot_table_single_index_no_column_single_value(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": "A",
+            "columns": None,
+            "values": "D",
+        },
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_pivot_table_multi_index_no_column_single_value(df_data):
+    pivot_table_test_helper(
+        df_data,
+        {
+            "index": ["A", "B"],
+            "columns": None,
+            "values": "D",
+        },
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_pivot_table_no_index_no_column_single_value(df_data):
+    pivot_table_test_helper_expects_exception(
+        df_data,
+        {
+            "index": None,
+            "columns": None,
+            "values": "D",
+        },
+        expect_exception_match="pivot_table with no index configuration is currently not supported",
+        expect_exception_type=NotImplementedError,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason="SNOW-1201994: index contains ints coerced to string",
+    # df_data_with_duplicates is wrapped in a call to `np.array` that coerces the
+    # provided integer literals to str.
+    # pandas 2.1 no longer allows the calculation of mean() on string columns, and now fails.
+    # Though the data is now fixed to contain the appropriate mix of strings and ints, the test
+    # now fails because the column names of the Snowpark pandas pivot_table result are coerced
+    # to string instead of staying as int.
+    # https://github.com/pandas-dev/pandas/issues/36703
+    # https://github.com/pandas-dev/pandas/issues/44008
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_pivot_table_with_duplicate_values(
+    df_data_with_duplicates,
+):
+    pivot_table_test_helper(
+        df_data_with_duplicates,
+        {
+            "index": "C",
+            "columns": "E",
+            "values": "D",
+        },
+        # Duplicates aren't handled currently for coercing and not needed for this particular test.
+        coerce_to_float64=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        "count",
+        "sum",
+    ],
+)
+@pytest.mark.parametrize(
+    "values",
+    [
+        "DS",
+        "FV",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_pivot_table_with_sum_and_count_null_and_empty_values_matching_behavior(
+    df_data_small, aggfunc, values
+):
+    pivot_table_test_helper(
+        df_data_small,
+        {"index": ["ax", "AX"], "columns": "aX", "values": values, "aggfunc": aggfunc},
+    )
+
+
+@pytest.mark.skip(
+    "SNOW-870145: This fails because nan values are not stored as null so we count/sum them differently"
+)
+@pytest.mark.parametrize(
+    "aggfunc",
+    [
+        "count",
+        "sum",
+    ],
+)
+def test_pivot_table_with_sum_and_count_null_and_empty_values_matching_behavior_skipped(
+    df_data_small, aggfunc, values
+):
+    pivot_table_test_helper(
+        df_data_small,
+        {"index": ["AX", "aX"], "columns": "ax", "values": "ET", "aggfunc": aggfunc},
+    )
+
+
+@sql_count_checker(query_count=5, join_count=1)
+def test_pivot_on_inline_data_using_temp_table():
+    # Create a large dataframe of inlined data that will spill to a temporary table.
+    snow_df = pd.DataFrame(
+        {k: list(range(25)) for k in list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")},
+        index=pd.Index(list(range(25)), name="index_no"),
+    )
+
+    snow_df = snow_df.pivot_table(
+        index="index_no", values="A", columns="B", aggfunc=["sum", "count"]
+    )
+
+    # This would fail if the inlined data was not materialized first.
+    row_count = snow_df._query_compiler.get_axis_len(0)
+
+    assert row_count == 25
+
+
+@pytest.mark.xfail(strict=True, raises=SnowparkSQLException, reason="SNOW-1233895")
+def test_pivot_empty_frame_snow_1233895():
+    eval_snowpark_pandas_result(
+        *create_test_dfs(columns=["a", "b", "c"]),
+        lambda df: df.pivot_table(index="a", columns="b")
+    )
diff --git a/tests/integ/modin/pivot/test_pivot_utils.py b/tests/integ/modin/pivot/test_pivot_utils.py
new file mode 100644
index 00000000000..2ce92c5f4fd
--- /dev/null
+++ b/tests/integ/modin/pivot/test_pivot_utils.py
@@ -0,0 +1,66 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# This test file contains tests that execute a single underlying snowpark/snowflake pivot query.
+
+from unittest import mock
+
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.pivot_utils import (
+    generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@sql_count_checker(query_count=0)
+def test_generate_pivot_aggregation_value_label_pairs():
+    fake_frame = mock.create_autospec(InternalFrame)
+    fake_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels = mock.Mock(
+        return_value=[
+            ('"A"',),
+            (
+                '"B"',
+                '"B_2"',
+            ),
+        ]
+    )
+
+    values_label_to_snowflake_quoted_identifiers = (
+        generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings(
+            ["A", "B"], fake_frame
+        )
+    )
+
+    assert len(values_label_to_snowflake_quoted_identifiers) == 3
+    assert (
+        values_label_to_snowflake_quoted_identifiers[0].pandas_label == "A"
+        and values_label_to_snowflake_quoted_identifiers[0].snowflake_quoted_identifier
+        == '"A"'
+    )
+    assert (
+        values_label_to_snowflake_quoted_identifiers[1].pandas_label == "B"
+        and values_label_to_snowflake_quoted_identifiers[1].snowflake_quoted_identifier
+        == '"B"'
+    )
+    assert (
+        values_label_to_snowflake_quoted_identifiers[2].pandas_label == "B"
+        and values_label_to_snowflake_quoted_identifiers[2].snowflake_quoted_identifier
+        == '"B_2"'
+    )
+
+    fake_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels = mock.Mock(
+        return_value=[]
+    )
+    values_label_to_snowflake_quoted_identifiers = (
+        generate_pivot_aggregation_value_label_snowflake_quoted_identifier_mappings(
+            [], fake_frame
+        )
+    )
+
+    assert len(values_label_to_snowflake_quoted_identifiers) == 1
+    assert (
+        values_label_to_snowflake_quoted_identifiers[0].pandas_label is None
+        and values_label_to_snowflake_quoted_identifiers[0].snowflake_quoted_identifier
+        is None
+    )
diff --git a/tests/integ/modin/resample/__init__.py b/tests/integ/modin/resample/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/resample/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/resample/test_resample.py b/tests/integ/modin/resample/test_resample.py
new file mode 100644
index 00000000000..c785f8a9d00
--- /dev/null
+++ b/tests/integ/modin/resample/test_resample.py
@@ -0,0 +1,461 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import random
+import string
+
+import numpy as np
+import pandas as native_pd
+import pytest
+
+from snowflake.snowpark.modin.plugin._internal.resample_utils import (
+    IMPLEMENTED_AGG_METHODS,
+    IMPLEMENTED_DATEOFFSET_STRINGS,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    create_test_dfs,
+    create_test_series,
+    eval_snowpark_pandas_result,
+)
+
+agg_func = pytest.mark.parametrize("agg_func", IMPLEMENTED_AGG_METHODS)
+freq = pytest.mark.parametrize("freq", IMPLEMENTED_DATEOFFSET_STRINGS)
+interval = pytest.mark.parametrize("interval", [1, 2, 3, 5, 15])
+
+
+def randomword(length):
+    letters = string.ascii_lowercase
+    return "".join(random.choice(letters) for i in range(length))
+
+
+@freq
+@interval
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_with_varying_freq_and_interval(freq, interval, agg_func):
+    rule = f"{interval}{freq}"
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": np.random.randn(15)},
+            index=native_pd.date_range("2020-01-01", periods=15, freq=f"1{freq}"),
+        ),
+        lambda df: getattr(df.resample(rule=rule), agg_func)(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_date_before_snowflake_alignment_date():
+    # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
+    date_data = native_pd.to_datetime(
+        [
+            "1960-01-01",
+            "1960-01-02",
+            "1960-01-03",
+            "1960-01-05",
+            "1960-01-06",
+            "1960-01-10",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"A": np.random.randn(6)}, index=date_data),
+        lambda df: df.resample("2D").min(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_date_wraparound_snowflake_alignment_date(interval):
+    # Snowflake TIMESLICE alignment date is 1970-01-01 00:00:00
+    date_data = native_pd.to_datetime(
+        [
+            "1969-12-01",
+            "1969-12-30",
+            "1969-12-31",
+            "1970-01-02",
+            "1970-01-05",
+            "1970-01-06",
+            "1970-01-10",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"A": np.random.randn(7)}, index=date_data),
+        lambda df: df.resample(f"{interval}D").min(),
+        check_freq=False,
+    )
+
+
+@agg_func
+@freq
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_missing_data_upsample(agg_func, freq):
+    # this tests to make sure that missing resample bins will be filled in.
+    date_data = native_pd.date_range("2020-01-01", periods=13, freq=f"1{freq}").delete(
+        [2, 7, 9]
+    )
+    rule = f"1{freq}"
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"A": np.random.randn(10)}, index=date_data),
+        lambda df: getattr(df.resample(rule=rule), agg_func)(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_duplicated_timestamps_downsample():
+    date_data = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-01",
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-07",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"A": np.random.randn(6)}, index=date_data),
+        lambda df: df.resample("1D").mean(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_duplicated_timestamps():
+    date_data = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-01",
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-04",
+            "2020-01-05",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"A": np.random.randn(7)}, index=date_data),
+        lambda df: df.resample("1D").min(),
+        check_freq=False,
+    )
+
+
+@freq
+@interval
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_series(freq, interval, agg_func):
+    rule = f"{interval}{freq}"
+    eval_snowpark_pandas_result(
+        *create_test_series(
+            range(15),
+            index=native_pd.date_range("2020-01-01", periods=15, freq=f"1{freq}"),
+        ),
+        lambda ser: getattr(ser.resample(rule=rule), agg_func)(),
+        check_freq=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "agg_func", ["max", "min", "mean", "median", "sum", "std", "var"]
+)
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_numeric_only(agg_func):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": np.random.randn(15), "B": [randomword(8) for _ in range(15)]},
+            index=native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+        ),
+        lambda df: getattr(df.resample(rule="4D"), agg_func)(numeric_only=True),
+        check_freq=False,
+    )
+
+
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_df_with_nan(agg_func):
+    # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": [np.nan, 3, np.nan, 4, 5.33], "B": [np.nan, np.nan, 6, 7, 9]},
+            index=native_pd.date_range("2020-01-01", periods=5, freq="1D"),
+        ),
+        lambda df: getattr(df.resample("2D"), agg_func)(),
+        check_freq=False,
+    )
+
+
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ser_with_nan(agg_func):
+    # 1 resample bin of all NaN, 1 resample bin partially NaN, 1 resample bin no NaNs
+    eval_snowpark_pandas_result(
+        *create_test_series(
+            [np.nan, np.nan, 7.33, np.nan, 9, 10],
+            index=native_pd.date_range("2020-01-01", periods=6, freq="1D"),
+        ),
+        lambda ser: getattr(ser.resample("2D"), agg_func)(),
+        check_freq=False,
+    )
+
+
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_single_resample_bin(agg_func):
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": np.random.randn(15)},
+            index=native_pd.date_range("2020-01-01", periods=15, freq="1s"),
+        ),
+        lambda df: getattr(df.resample("1D"), agg_func)(),
+        check_freq=False,
+    )
+
+
+@agg_func
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_index_with_nan(agg_func):
+    datecol = native_pd.to_datetime(
+        ["2020-01-01", "2020-01-03", "2020-01-05", np.nan, "2020-01-09", np.nan]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": np.random.randn(6)},
+            index=datecol,
+        ),
+        lambda df: getattr(df.resample("2D"), agg_func)(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_df_getitem():
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": np.random.randn(10), "B": np.random.randn(10)},
+            index=native_pd.date_range("2020-01-01", periods=10, freq="1D"),
+        ),
+        lambda df: df.resample("2D").min()["A"],
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ser_getitem():
+    eval_snowpark_pandas_result(
+        *create_test_series(
+            range(15), index=native_pd.date_range("2020-01-01", periods=15, freq="1D")
+        ),
+        lambda ser: ser.resample(rule="2D").min()[0:2],
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_date_trunc_day():
+    # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": [np.nan, 3, np.nan, 4, 5.33], "B": [np.nan, np.nan, 6, 7, 9]},
+            index=native_pd.date_range("2020-01-01 2:00:00", periods=5, freq="1D"),
+        ),
+        lambda df: df.resample("2D").min(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_date_trunc_hour():
+    # resample bins of 'A' each have a NaN. 1 resample bin of 'B' is all NaN
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"A": [np.nan, 3, np.nan, 4, 5.33], "B": [np.nan, np.nan, 6, 7, 9]},
+            index=native_pd.date_range("2020-01-01 2:00:23", periods=5, freq="1h"),
+        ),
+        lambda df: df.resample("2H").min(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01 1:00:00",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+            "2020-01-09",
+            "2020-02-08",
+        ],
+        format="mixed",
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"a": range(len(datecol)), "b": range(len(datecol) - 1, -1, -1)},
+            index=datecol,
+        ),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_ser(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01 1:00:00",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+            "2020-01-09",
+            "2020-02-08",
+        ],
+        format="mixed",
+    )
+    eval_snowpark_pandas_result(
+        *create_test_series({"a": range(len(datecol))}, index=datecol),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_one_gap(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"a": range(len(datecol)), "b": range(len(datecol) - 1, -1, -1)},
+            index=datecol,
+        ),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def resample_ffill_ser_one_gap():
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_series({"a": range(len(datecol))}, index=datecol),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_missing_in_middle(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03 1:00:00",
+            "2020-01-03 2:00:00",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+        ],
+        format="mixed",
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs(
+            {"a": range(len(datecol)), "b": range(len(datecol) - 1, -1, -1)},
+            index=datecol,
+        ),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_ser_missing_in_middle(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03 1:00:00",
+            "2020-01-03 2:00:00",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+        ],
+        format="mixed",
+    )
+    eval_snowpark_pandas_result(
+        *create_test_series({"a": range(len(datecol))}, index=datecol),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_ffilled_with_none(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03 2:00:00",
+            "2020-01-03 1:00:00",
+            "2020-01-06",
+            "2020-01-07",
+            "2020-01-08",
+            "2020-01-10",
+        ],
+        format="mixed",
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"a": [1, 2, None, 4, 5, 7, None, 8]}, index=datecol),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
+
+
+@interval
+@sql_count_checker(query_count=2, join_count=1)
+def test_resample_ffill_large_gaps(interval):
+    datecol = native_pd.to_datetime(
+        [
+            "2020-01-01",
+            "2020-01-02",
+            "2020-01-03",
+            "2020-01-06",
+            "2020-07-07",
+            "2021-01-08",
+            "2021-01-10",
+        ]
+    )
+    eval_snowpark_pandas_result(
+        *create_test_dfs({"a": [1, None, 4, 5, 7, None, 8]}, index=datecol),
+        lambda df: df.resample(rule=f"{interval}D").ffill(),
+        check_freq=False,
+    )
diff --git a/tests/integ/modin/resample/test_resample_negative.py b/tests/integ/modin/resample/test_resample_negative.py
new file mode 100644
index 00000000000..736fdfd0132
--- /dev/null
+++ b/tests/integ/modin/resample/test_resample_negative.py
@@ -0,0 +1,106 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.resample_utils import (
+    NOT_IMPLEMENTED_DATEOFFSET_STRINGS,
+    UNSUPPORTED_DATEOFFSET_STRINGS,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.mark.parametrize("index_col", [["datecol", "B"], ["A", "B"], ["A"]])
+@sql_count_checker(query_count=0)
+def test_resample_invalid_index_type_negative(index_col):
+    snow_df = pd.DataFrame(
+        {
+            "A": np.random.randn(15),
+            "B": np.random.randn(15),
+            "datecol": native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+        }
+    )
+
+    multi_index_error_msg = ", but got an instance of 'MultiIndex'"
+    snow_df = snow_df.set_index(index_col)
+    with pytest.raises(
+        TypeError,
+        match=f"Only valid with DatetimeIndex{multi_index_error_msg if len(index_col) > 1 else ''}",
+    ):
+        snow_df.resample(rule="2D").mean()
+
+
+@pytest.mark.parametrize("invalid_rule", ["3KQ", {"qweqwe": 123}])
+@sql_count_checker(query_count=0)
+def test_resample_invalid_rule_negative(invalid_rule):
+    snow_df = pd.DataFrame(
+        {"A": np.random.randn(15)},
+        index=native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+    )
+
+    with pytest.raises(ValueError, match=f"Invalid frequency: {invalid_rule}"):
+        snow_df.resample(rule=invalid_rule).mean()
+
+
+@pytest.mark.parametrize("freq", UNSUPPORTED_DATEOFFSET_STRINGS)
+@sql_count_checker(query_count=0)
+def test_resample_unsupported_freq_negative(freq):
+    snow_df = pd.DataFrame(
+        {"A": np.random.randn(15)},
+        index=native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match=f"Unsupported frequency: {freq}. Snowpark pandas cannot map {freq} to a Snowflake date or time unit.",
+    ):
+        snow_df.resample(rule=freq).max().to_pandas()
+
+
+@pytest.mark.parametrize("freq", NOT_IMPLEMENTED_DATEOFFSET_STRINGS)
+@sql_count_checker(query_count=0)
+def test_resample_not_yet_implemented_freq(freq):
+    snow_df = pd.DataFrame(
+        {"A": np.random.randn(15)},
+        index=native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+    )
+
+    with pytest.raises(NotImplementedError):
+        snow_df.resample(freq).min().to_pandas()
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda re: re.max(numeric_only=True),
+        lambda re: re.min(numeric_only=True),
+        lambda re: re.mean(numeric_only=True),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_agg_numeric_true_raises(func):
+    snow_ser = pd.Series(
+        range(15), index=native_pd.date_range("2020-01-01", periods=15, freq="1D")
+    )
+    resampler = snow_ser.resample("1D")
+    msg = "Series Resampler does not implement numeric_only."
+    with pytest.raises(NotImplementedError, match=msg):
+        func(resampler)
+
+
+@sql_count_checker(query_count=0)
+def test_resample_ffill_negative():
+    snow_df = pd.DataFrame(
+        {"a": range(15)},
+        index=native_pd.date_range("2020-01-01", periods=15, freq="1D"),
+    )
+    with pytest.raises(
+        NotImplementedError,
+        match="Parameter limit of resample.ffill has not been implemented",
+    ):
+        snow_df.resample("1D").ffill(limit=10)
diff --git a/tests/integ/modin/series/__init__.py b/tests/integ/modin/series/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/series/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/series/conftest.py b/tests/integ/modin/series/conftest.py
new file mode 100644
index 00000000000..cb4885a9fd1
--- /dev/null
+++ b/tests/integ/modin/series/conftest.py
@@ -0,0 +1,120 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+default_index_series_data = [1, 1.1, True, "a", "2021-01-01", (1,), [1]]
+loc_str_index = ["a", "b", "c", "d", "e", "f", "a"]
+str_index_native_series_input = native_pd.Series(
+    default_index_series_data,
+    index=loc_str_index,
+)
+mixed_type_index = [0, 1.1, 1.5, 2, "a", "b", 4, 5, "c", "abc", -2, -1, True, None]
+mixed_type_index_data = [
+    1,
+    1.1,
+    True,
+    "a",
+    "2021-01-01",
+    [14, 13, 12],
+    15,
+    20,
+    19,
+    False,
+    16,
+    "2021-01-01",
+    18,
+    17,
+]
+
+arrays_for_multiindex = [
+    ["bar", "bar", "baz", "baz", "foo", "foo", "extra"],
+    ["one", "two", "one", "two", "one", "two", "one"],
+]
+
+date_columns = native_pd.date_range(start="2023-01-01", periods=7, freq="D", tz="UTC")
+date_columns_no_tz = date_columns.tz_localize(None)
+
+
+@pytest.fixture(scope="function")
+def multiindex_native():
+    tuples = list(zip(*arrays_for_multiindex))
+    return native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+
+
+@pytest.fixture(scope="function")
+def multiindex_native_int_series(multiindex_native):
+    return native_pd.Series(list(range(7)), index=multiindex_native)
+
+
+@pytest.fixture(scope="function")
+def default_index_native_int_series():
+    return native_pd.Series(list(range(7)), index=loc_str_index)
+
+
+@pytest.fixture(scope="function")
+def default_index_native_series():
+    return native_pd.Series(default_index_series_data)
+
+
+@pytest.fixture(scope="function")
+def default_index_snowpark_pandas_series():
+    return pd.Series(default_index_series_data)
+
+
+@pytest.fixture(scope="function")
+def default_index_native_int_snowpark_pandas_series():
+    return pd.Series(list(range(7)), index=loc_str_index)
+
+
+@pytest.fixture(scope="function")
+def empty_snowpark_pandas_series():
+    # Note: For pandas 1.5.3 a future warning gets emitted for pd.Series() which gets type float64 assigned.
+    # Yet, in pandas 2.x an empty series gets a more correct type object
+    return pd.Series()
+
+
+@pytest.fixture(scope="function")
+def empty_pandas_series():
+    # Note: For pandas 1.5.3 a future warning gets emitted for pd.Series() which gets type float64 assigned.
+    # Yet, in pandas 2.x an empty series gets a more correct type object
+    return native_pd.Series()
+
+
+@pytest.fixture(scope="function")
+def str_index_native_series():
+    return str_index_native_series_input.copy(deep=True)
+
+
+@pytest.fixture(scope="function")
+def str_index_snowpark_pandas_series():
+    return pd.Series(str_index_native_series_input)
+
+
+@pytest.fixture(scope="function")
+def mixed_type_index_native_series_mixed_type_index():
+    return native_pd.Series(mixed_type_index_data, index=mixed_type_index)
+
+
+@pytest.fixture(scope="function")
+def mixed_type_index_native_series_default_index():
+    return native_pd.Series(mixed_type_index_data)
+
+
+@pytest.fixture(scope="function")
+def native_series_with_duplicate_boolean_index():
+    return native_pd.Series([3.14, "abc", [1, 2], None], index=[0, 1, False, True])
+
+
+@pytest.fixture(scope="function")
+def time_index_snowpark_pandas_series():
+    return pd.Series(list(range(7)), index=date_columns_no_tz)
+
+
+@pytest.fixture(scope="function")
+def time_index_native_series():
+    return native_pd.Series(list(range(7)), index=date_columns_no_tz)
diff --git a/tests/integ/modin/series/test_add_prefix.py b/tests/integ/modin/series/test_add_prefix.py
new file mode 100644
index 00000000000..31465f28ccb
--- /dev/null
+++ b/tests/integ/modin/series/test_add_prefix.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_ADD_PREFIX_DATA = [
+    "prefix_",
+    " 0 9 0 1 2 3",
+    12345,
+    ("tuple data", 12),
+    [24, 25, 26, "list!"],
+    [[], [1, 2]],
+    native_pd.Series(["this", "is", "a", "series"]),
+    native_pd.DataFrame({"column1": [67, 68], "column2": [909, 910]}),
+    None,
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_series_add_prefix(
+    prefix, str_index_snowpark_pandas_series, str_index_native_series
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_series,
+        str_index_native_series,
+        lambda ser: ser.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_series_add_prefix_multiindex(prefix, multiindex_native_int_series):
+    eval_snowpark_pandas_result(
+        pd.Series(multiindex_native_int_series),
+        multiindex_native_int_series,
+        lambda ser: ser.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", TEST_ADD_PREFIX_DATA)
+def test_series_add_prefix_time_column_df(
+    prefix, time_index_snowpark_pandas_series, time_index_native_series
+):
+    # Native pandas time values are of the format `2023-01-01 00:00:00` while Snowflake is `2023-01-01 00:00:00.000`.
+    # For easier comparison, add_suffix is called with suffix ".000" for the native pandas df.
+    eval_snowpark_pandas_result(
+        time_index_snowpark_pandas_series,
+        time_index_native_series.add_suffix(".000"),
+        lambda ser: ser.add_prefix(prefix),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_series_add_prefix_snowpark_pandas_series(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    prefix_series = native_pd.Series([1, 2, 3])
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser.add_prefix(
+            pd.Series(prefix_series) if isinstance(ser, pd.Series) else prefix_series
+        ),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_series_add_prefix_snowpark_pandas_df(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    prefix_df = native_pd.DataFrame([["1", "2"], ["3", "4"]], dtype=str)
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser.add_prefix(
+            pd.DataFrame(prefix_df) if isinstance(ser, pd.Series) else prefix_df
+        ),
+    )
diff --git a/tests/integ/modin/series/test_add_suffix.py b/tests/integ/modin/series/test_add_suffix.py
new file mode 100644
index 00000000000..805c5bc897e
--- /dev/null
+++ b/tests/integ/modin/series/test_add_suffix.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_ADD_SUFFIX_DATA = [
+    "_suffix",
+    " 0 9 0 1 2 3",
+    12345,
+    ("tuple data", 12),
+    [24, 25, 26, "list!"],
+    [[], [1, 2]],
+    native_pd.Series(["this", "is", "a", "series"]),
+    native_pd.DataFrame({"column1": [67, 68], "column2": [909, 910]}),
+    None,
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_series_add_suffix(
+    suffix, str_index_snowpark_pandas_series, str_index_native_series
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_series,
+        str_index_native_series,
+        lambda series: series.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_add_suffix_multiindex(suffix, multiindex_native_int_series):
+    eval_snowpark_pandas_result(
+        pd.Series(multiindex_native_int_series),
+        multiindex_native_int_series,
+        lambda df: df.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("suffix", TEST_ADD_SUFFIX_DATA)
+def test_add_suffix_time_column_df(
+    suffix, time_index_snowpark_pandas_series, time_index_native_series
+):
+    # Native pandas time values are of the format `2023-01-01 00:00:00` while Snowflake is `2023-01-01 00:00:00.000`.
+    # For easier comparison, add_suffix is called with suffix ".000" for the native pandas df.
+    eval_snowpark_pandas_result(
+        time_index_snowpark_pandas_series,
+        time_index_native_series.add_suffix(".000"),
+        lambda df: df.add_suffix(suffix),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_series_add_suffix_snowpark_pandas_series(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    suffix_series = native_pd.Series([1, 2, 3])
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser.add_suffix(
+            pd.Series(suffix_series) if isinstance(ser, pd.Series) else suffix_series
+        ),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_series_add_suffix_snowpark_pandas_df(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    suffix_df = native_pd.DataFrame([["1", "2"], ["3", "4"]], dtype=str)
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser.add_suffix(
+            pd.DataFrame(suffix_df) if isinstance(ser, pd.Series) else suffix_df
+        ),
+    )
diff --git a/tests/integ/modin/series/test_aggregate.py b/tests/integ/modin/series/test_aggregate.py
new file mode 100644
index 00000000000..7c2eac54166
--- /dev/null
+++ b/tests/integ/modin/series/test_aggregate.py
@@ -0,0 +1,321 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_almost_equal
+from pandas.errors import SpecificationError
+from pytest import param
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    ARRAY_DATA_AND_TYPE,
+    MAP_DATA_AND_TYPE,
+    MIXED_NUMERIC_STR_DATA_AND_TYPE,
+    TIMESTAMP_DATA_AND_TYPE,
+    create_test_series,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="function")
+def native_series() -> native_pd.Series:
+    index = native_pd.Index(["a", "b", "b", "a", "c"], name="index")
+    native_series = native_pd.Series([3.5, 1.2, 4.3, 2.0, 1.8], index=index)
+    return native_series
+
+
+def validate_scalar_result(res1, res2):
+    if native_pd.isna(res1):
+        # in python nan != nan, here we do a check for nan specially
+        assert native_pd.isna(res2)
+    else:
+        assert_almost_equal(res1, res2)
+
+
+@pytest.mark.parametrize(
+    "func, is_scalar, has_count_aggregate, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), False, False, 0),
+        (lambda df: df.aggregate(["min", np.max]), False, False, 1),
+        (lambda df: df.aggregate(["min", np.max, "count"]), False, True, 2),
+        (lambda df: df.aggregate(min), True, False, 0),
+        (lambda df: df.max(), True, False, 0),
+        (lambda df: df.max(skipna=False), True, False, 0),
+        (lambda df: df.count(), True, False, 0),
+    ],
+)
+@pytest.mark.parametrize(
+    "data, dtype",
+    [
+        ([3.5, 1.2, 4.3, 2.0, 1.8], "float32"),
+        ([3.5, native_pd.NA, 4.3, native_pd.NA, 1.8], "Float32"),
+        ([2, native_pd.NA, native_pd.NA, native_pd.NA, 10], "Int64"),
+        ([None, "c", None, "d"], "string"),
+        (
+            [
+                native_pd.Timestamp(1513393355.5, unit="s"),
+                native_pd.NaT,
+                native_pd.NaT,
+                native_pd.Timestamp(1513393355.5, unit="s"),
+            ],
+            "datetime64[ns]",
+        ),
+    ],
+)
+def test_agg_series(
+    func, is_scalar, has_count_aggregate, data, dtype, expected_union_count
+):
+    native_series = native_pd.Series(data)
+    native_series = native_series.astype(dtype)
+    snow_series = pd.Series(native_series)
+
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        if is_scalar:
+            eval_snowpark_pandas_result(
+                snow_series, native_series, func, comparator=validate_scalar_result
+            )
+        else:
+            eval_snowpark_pandas_result(snow_series, native_series, func)
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "numeric_only_kwargs",
+    [
+        param({}, id="no_numeric_only_kwarg"),
+        param({"numeric_only": False}, id="numeric_only_False"),
+        param({"numeric_only": None}, id="numeric_only_None"),
+    ],
+)
+def test_string_sum_not_numeric_only(numeric_only_kwargs):
+    eval_snowpark_pandas_result(
+        *create_test_series(["a", "b", "c"]),
+        lambda df: df.sum(**numeric_only_kwargs),
+        comparator=validate_scalar_result,
+        expect_exception=numeric_only_kwargs.get("numeric_only", False),
+    )
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize(
+    "method", ["sum", "var", "mean", "std", "min", "max", "median"]
+)
+def test_string_aggregation_numeric_only(method):
+    # pandas.Series.sum does not support truthy numeric_only:
+    # https://github.com/pandas-dev/pandas/blob/2e218d10984e9919f0296931d92ea851c6a6faf5/pandas/core/series.py#L4801-L4814
+    eval_snowpark_pandas_result(
+        *create_test_series(["a", "b", "c"]),
+        lambda df: getattr(df, method)(numeric_only=True),
+        expect_exception=True,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_string_sum_with_nulls():
+    """
+    pandas raises TypeError('can only concatenate str (not \"int\") to str') but
+    we instead drop the null values without an exception because checking for
+    nulls requires at least one extra query. one possible solution is to do the
+    sum and then compare the length of the result to the length of the df, but
+    right now that requires an extra length query."""
+    snow_series, pandas_series = create_test_series(["a", None, "b"])
+    with pytest.raises(TypeError):
+        pandas_series.sum()
+    snow_result = snow_series.sum()
+    validate_scalar_result(snow_result, "ab")
+
+
+@pytest.mark.parametrize(
+    "data, dtype",
+    [
+        ([3.5, 1.2, 4.3, 2.0, 1.8], "float"),  # no missing
+        ([3.5, native_pd.NA, 4.3, native_pd.NA, 1.8], "Float64"),  # 2 missing
+        ([2, native_pd.NA, native_pd.NA, native_pd.NA, 10], "Int64"),  # 3 missing
+    ],
+)
+@pytest.mark.parametrize("min_count", [-1, 0, 1, 3, 4, 8, 20])
+@sql_count_checker(query_count=1)
+def test_series_sum_min_count(data, dtype, min_count):
+    native_series = native_pd.Series(data)
+    native_series = native_series.astype(dtype)
+    snow_series = pd.Series(native_series)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda se: se.sum(min_count=min_count),
+        comparator=validate_scalar_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "data, dtype",
+    [
+        ([3.5, 1.2, 4.3, 2.0, 1.8], "float32"),  # empty series
+        ([3.5, native_pd.NA, 4.3, native_pd.NA, 1.8], "Float32"),
+        ([2, native_pd.NA, native_pd.NA, native_pd.NA, 10], "Int64"),
+        ([False, native_pd.NA, native_pd.NA, native_pd.NA, True], "boolean"),
+        ([], "float"),
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=1)
+def test_general_agg_numeric_series(skipna_agg_method, data, dtype, skipna):
+    native_series = native_pd.Series(data)
+    native_series = native_series.astype(dtype)
+    snow_series = pd.Series(native_series)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda grp: getattr(grp, skipna_agg_method)(skipna=skipna),
+        comparator=validate_scalar_result,
+    )
+
+
+@pytest.mark.parametrize(
+    "func, is_scalar, expected_union_count",
+    [
+        (lambda df: df.aggregate(["min"]), False, 0),
+        (lambda df: df.aggregate(["min", np.max]), False, 1),
+        (lambda df: df.aggregate(min), True, 0),
+        (lambda df: df.count(), True, 0),
+    ],
+)
+def test_agg_on_empty_series(func, is_scalar, expected_union_count):
+    native_empty_series = native_pd.Series([], dtype="float64")
+    snow_series = pd.Series(native_empty_series)
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        if is_scalar:
+            eval_snowpark_pandas_result(
+                snow_series,
+                native_empty_series,
+                func,
+                comparator=validate_scalar_result,
+            )
+        else:
+            eval_snowpark_pandas_result(snow_series, native_empty_series, func)
+
+
+@sql_count_checker(query_count=2)
+def test_min_max_with_mixed_str_numeric_type():
+    mixed_data, _ = MIXED_NUMERIC_STR_DATA_AND_TYPE
+    native_series = native_pd.Series(mixed_data)
+    snow_series = pd.Series(native_series)
+    result_max = snow_series.max()
+    # This behavior is different compare with native pandas, in native pandas
+    # min/max comparison between string and numeric value is invalid. However,
+    # it is valid with snowflake.
+    assert result_max == "A"
+
+    result_min = snow_series.min()
+    assert result_min == 1
+
+
+@pytest.mark.parametrize(
+    "func, message, exception_type, same_as_pandas",
+    [
+        (
+            lambda se: se.aggregate({"index": {"index": min}}),
+            "Value for func argument in dict format is not allowed for Series aggregate",
+            SpecificationError,
+            False,
+        ),
+        (
+            lambda se: se.max(axis=1),
+            "No axis named 1 for object type Series",
+            ValueError,
+            True,
+        ),
+        (
+            lambda se: se.aggregate([min, max], axis=1),
+            "No axis named 1 for object type Series",
+            ValueError,
+            True,
+        ),
+        (
+            lambda se: se.aggregate([min, max], axis="columns"),
+            "No axis named columns for object type Series",
+            ValueError,
+            True,
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_invalid_arg_raise(
+    native_series, func, message, exception_type, same_as_pandas
+):
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        func,
+        expect_exception=True,
+        expect_exception_type=exception_type,
+        expect_exception_match=message,
+        assert_exception_equal=same_as_pandas,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_duplicate_agg_funcs_raise(native_series):
+    snow_series = pd.Series(native_series)
+    # native pandas raises an exception for duplicated aggregation function with dataframe,
+    # but not for series. Snowpark pandas raise error for duplicated aggregation functions
+    # in general for consistency.
+    with pytest.raises(SpecificationError, match="Function names must be unique"):
+        snow_series.aggregate([min, max, min])
+
+
+@pytest.mark.parametrize("agg_func", ["sum", "std", "var", "mean", "median"])
+@pytest.mark.parametrize(
+    "data, data_type",
+    [
+        ARRAY_DATA_AND_TYPE,
+        ARRAY_DATA_AND_TYPE,
+        TIMESTAMP_DATA_AND_TYPE,
+        MIXED_NUMERIC_STR_DATA_AND_TYPE,
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_agg_invalid_dtype_raise(agg_func, data, data_type):
+    snow_series = pd.Series(data)
+    message = "Invalid argument types for function"
+    # Sum on string and array type is valid in pandas, but becomes invalid with Snowflake
+    # TODO (SNOW-899808): support sum with string and array type
+    if data_type == "Variant":
+        message = "Failed to cast variant value"
+    elif agg_func == "median":
+        message = "incompatible types"
+    with pytest.raises(SnowparkSQLException, match=message):
+        getattr(snow_series, agg_func)()
+
+
+@pytest.mark.parametrize("agg_func", ["min", "max"])
+@pytest.mark.parametrize(
+    "data, data_type",
+    [ARRAY_DATA_AND_TYPE, MAP_DATA_AND_TYPE],
+)
+@sql_count_checker(query_count=0)
+def test_agg_invalid_min_max_dtype_raise(agg_func, data, data_type):
+    snow_series = pd.Series(data)
+    # min/max on array type is valid in pandas, but not valid with Snowflake.
+    message = f"Function {agg_func.upper()} does not support {data_type.upper()} argument type"
+    with pytest.raises(SnowparkSQLException, match=message):
+        getattr(snow_series, agg_func)()
+
+
+# skew is a little different, skipna is ignored in pandas.df.skew
+@sql_count_checker(query_count=2)
+def test_skew_series():
+    native_df = native_pd.DataFrame(np.array([1, 2, 3]), columns=["A"])
+    snow_df = pd.DataFrame(native_df)
+    assert snow_df["A"].skew() == native_df["A"].skew()
+    native_df = native_pd.DataFrame(np.array([1, 2, 1]), columns=["A"])
+    snow_df = pd.DataFrame(native_df)
+    assert round(snow_df["A"].skew(), 4) == round(native_df["A"].skew(), 4)
diff --git a/tests/integ/modin/series/test_all_any.py b/tests/integ/modin/series/test_all_any.py
new file mode 100644
index 00000000000..9a6148953cc
--- /dev/null
+++ b/tests/integ/modin/series/test_all_any.py
@@ -0,0 +1,183 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_values_equal, eval_snowpark_pandas_result
+
+
+@pytest.mark.xfail(strict=True, raises=AssertionError, reason="SNOW-1017231")
+@pytest.mark.parametrize("method", ["any", "all"])
+@sql_count_checker(query_count=4)
+def test_empty(method):
+    # Because empty dataframes are by default object and Snowpark pandas currently falls back
+    # for non-int/bool columns, we need to explicitly specify a type here
+
+    # Treat index like any other column in a DataFrame when it comes to types,
+    # therefore Snowpark pandas returns a Index(dtype="object") for an empty index
+    # whereas pandas returns RangeIndex()
+    # This is compatible with behavior for empty dataframe in other tests.
+    eval_snowpark_pandas_result(
+        pd.Series([], dtype="int64"),
+        native_pd.Series([], dtype="int64"),
+        lambda df: getattr(df, method)(),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3],
+        [0, 0, 1],
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_all_int(data):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.all(),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0, 0, 0],
+        [0, 1, 2],
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_any_int(data):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.any(),
+        comparator=assert_values_equal,
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_all_named_index():
+    data = [1, 0, 3]
+    index_name = ["a", "b", "c"]
+    eval_snowpark_pandas_result(
+        pd.Series(data, index_name),
+        native_pd.Series(data, index_name),
+        lambda df: df.all(),
+        comparator=assert_values_equal,
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_any_named_index():
+    data = [1, 0, 3]
+    index_name = ["a", "b", "c"]
+    eval_snowpark_pandas_result(
+        pd.Series(data, index_name),
+        native_pd.Series(data, index_name),
+        lambda df: df.any(),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0.1, 0.0, 0.3],
+        [0.1, 0.2, 0.3],
+        [np.nan, 0.0],
+        [0.4, 0.5, np.nan],
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_all_float_fallback(data, skipna):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.all(skipna=skipna),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0.1, 0.0, 0.3],
+        [0.1, 0.2, 0.3],
+        [np.nan, 0.0],
+        [0.4, 0.5, np.nan],
+    ],
+)
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_any_float_fallback(data, skipna):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.any(skipna=skipna),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "data",
+    [["", "b", "c"], ["d", "e", "f"]],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_all_str_fallback(data):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.all(),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "data",
+    [["", "b", "c"], ["d", "e", "f"]],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_any_str_fallback(data):
+    eval_snowpark_pandas_result(
+        pd.Series(data),
+        native_pd.Series(data),
+        lambda df: df.any(),
+        comparator=assert_values_equal,
+    )
diff --git a/tests/integ/modin/series/test_apply.py b/tests/integ/modin/series/test_apply.py
new file mode 100644
index 00000000000..a4fab680e91
--- /dev/null
+++ b/tests/integ/modin/series/test_apply.py
@@ -0,0 +1,667 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime
+import logging
+import math
+from typing import Callable
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.testing import assert_series_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from snowflake.snowpark.functions import udf
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.types import DoubleType, VariantType
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    ColumnSchema,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    create_snow_df_with_table_and_data,
+    eval_snowpark_pandas_result,
+)
+
+BASIC_DATA_FUNC_RETURN_TYPE_MAP = [
+    ([1, 2, 3, None], lambda x: x + 1, "int"),
+    (["s", "n", "o", "w"], lambda x: x * 2, "str"),
+    ([1.0, 1.5, 2.0], math.exp, "float"),
+    ([True, True, False], int, "int"),
+    ([bytes("snow", "utf-8"), bytes("flake", "utf-8")], lambda x: x.decode(), "str"),
+    ([1, 2, 3], lambda x: [x, x + 1], "list"),
+    ([1, 2, 3], lambda x: {str(x): x * 8}, "dict"),
+    ([None, 1], lambda x: bool(np.isnan(x)), "bool"),
+    ([np.nan, None, 2.1], lambda x: x, "float"),
+]
+
+# TODO SNOW-876999: Test return date/time/timestamp type when
+#  timezone-aware type annotation on UDF is supported
+DATE_TIME_TIMESTAMP_DATA_FUNC_RETURN_TYPE_MAP = [
+    # data in DATE column will be encoded as pd.Timestamp,
+    # and NULL will be encoded as pd.NaT in vectorized udf
+    (
+        [datetime.date(2023, 1, 1), None],
+        str,
+        "str",
+        native_pd.Series(["2023-01-01 00:00:00", "NaT"]),
+    ),
+    (
+        [datetime.date(2023, 1, 1), None],
+        type,
+        "str",
+        native_pd.Series(
+            [
+                "<class 'pandas._libs.tslibs.timestamps.Timestamp'>",
+                "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+            ]
+        ),
+    ),
+    # data in TIME column will be encoded as pd.Timedelta (instead of pd.Timestamp!),
+    # and NULL will be encoded as pd.NaT in vectorized udf
+    (
+        [datetime.time(1, 2, 3), None],
+        str,
+        "str",
+        native_pd.Series(["0 days 01:02:03", "NaT"]),
+    ),
+    (
+        [datetime.time(1, 2, 3), None],
+        type,
+        "str",
+        native_pd.Series(
+            [
+                "<class 'pandas._libs.tslibs.timedeltas.Timedelta'>",
+                "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+            ]
+        ),
+    ),
+    # data in TIMESTAMP_NTZ/LTZ column will be encoded as pd.Timestamp,
+    # and NULL will be encoded as pd.NaT in vectorized udf
+    (
+        [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+        str,
+        "str",
+        native_pd.Series(["2023-01-01 01:02:03", "NaT"]),
+    ),
+    (
+        [datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+        type,
+        "str",
+        native_pd.Series(
+            [
+                "<class 'pandas._libs.tslibs.timestamps.Timestamp'>",
+                "<class 'pandas._libs.tslibs.nattype.NaTType'>",
+            ]
+        ),
+    ),
+    # data in TIMESTAMP_TZ column will be encoded as pd.Timestamp,
+    # and NULL will be encoded as None in vectorized udf
+    (
+        [
+            datetime.datetime(2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc),
+            datetime.datetime(2023, 1, 1, 1, 2, 3),
+            None,
+        ],
+        str,
+        "str",
+        native_pd.Series(
+            ["2023-01-01 01:02:03+00:00", "2023-01-01 01:02:03-08:00", "None"]
+        ),
+    ),
+    (
+        [datetime.datetime(2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc), None],
+        type,
+        "str",
+        native_pd.Series(
+            ["<class 'pandas._libs.tslibs.timestamps.Timestamp'>", "<class 'NoneType'>"]
+        ),
+    ),
+]
+
+FUNC_BODY_WITH_TYPE_HINTS_TEMPLATE = """
+def f(x) -> {0}:
+    return func(x)
+"""
+
+
+def create_func_with_return_type_hint(func: Callable, return_type: str) -> Callable:
+    """
+    Create a function with return type hint.
+    We create a python function using exec on a string template,
+    and add type annotation dynamically.
+    variable `d` contains the created function with its name as the key
+    """
+    func_body = FUNC_BODY_WITH_TYPE_HINTS_TEMPLATE.format(return_type)
+    d = {}
+    exec(func_body, {**globals(), **locals()}, d)
+    return d["f"]
+
+
+TEST_NUMPY_FUNCS = [np.min, np.sqrt, np.tan, np.sum, np.median]
+
+
+@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+@sql_count_checker(query_count=4, udf_count=1)
+def test_apply_basic_without_type_hints(data, func, return_type):
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.apply(func))
+
+
+@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+@sql_count_checker(query_count=8, udf_count=2)
+def test_apply_and_map_basic_with_type_hints(data, func, return_type):
+    func_with_type_hint = create_func_with_return_type_hint(func, return_type)
+
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda x: x.apply(func_with_type_hint)
+    )
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda x: x.map(func_with_type_hint)
+    )
+
+
+@pytest.mark.parametrize("data,func,return_type", BASIC_DATA_FUNC_RETURN_TYPE_MAP)
+@sql_count_checker(query_count=4, udf_count=1)
+def test_apply_and_map_type(data, func, return_type):
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+
+    def type_func_with_type_hint(x) -> str:
+        return type(x)
+
+    result = snow_series.apply(type_func_with_type_hint).to_pandas()
+    # We cannot use the assert_snowpark_pandas_equal_to_pandas helper function here
+    # because it would materialize `result` twice
+    assert_series_equal(
+        result,
+        # In the python udf, we can't return a type object (will raise an exception),
+        # so the function has to return it as a string here.
+        # However, in pandas, it's ok to keep type object in the series.
+        # Therefore, I need to convert the type object to string first in native pandas series
+        # so their values are equal.
+        native_series.apply(type_func_with_type_hint).astype(str),
+        check_dtype=False,
+        check_index_type=False,
+    )
+    assert_series_equal(
+        result,
+        native_series.map(type_func_with_type_hint).astype(str),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data,func,return_type,expected_result",
+    DATE_TIME_TIMESTAMP_DATA_FUNC_RETURN_TYPE_MAP,
+)
+@sql_count_checker(query_count=4, udf_count=1)
+def test_apply_date_time_timestamp(data, func, return_type, expected_result):
+    func_with_type_hint = create_func_with_return_type_hint(func, return_type)
+
+    snow_series = pd.Series(data)
+    result = snow_series.apply(func_with_type_hint)
+    assert_snowpark_pandas_equal_to_pandas(result, expected_result)
+
+
+def test_variant_apply(session):
+    data = [
+        None,
+        1,
+        1.1,
+        np.nan,
+        "s",
+        True,
+        bytes("snow", "utf-8"),
+        datetime.date(2023, 1, 1),
+        datetime.time(1, 1, 1),
+        datetime.datetime(2023, 1, 1, 1, 2, 3),
+        datetime.datetime(2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc),
+        [1, 2, datetime.datetime(2023, 1, 1, 1, 2, 3)],
+        {
+            "s": 1.2,
+            "d": datetime.datetime(2023, 1, 1, 1, 2, 3, tzinfo=datetime.timezone.utc),
+        },
+    ]
+    with SqlCounter(query_count=2):
+        snow_df = create_snow_df_with_table_and_data(
+            session,
+            random_name_for_temp_object(TempObjectType.TABLE),
+            [ColumnSchema("col", VariantType())],
+            [[e] for e in data],
+        )
+
+    expected_types = [
+        "<class 'NoneType'>",
+        "<class 'int'>",
+        "<class 'float'>",
+        "<class 'NoneType'>",
+        "<class 'str'>",
+        "<class 'bool'>",
+        "<class 'str'>",
+        "<class 'str'>",
+        "<class 'str'>",
+        "<class 'str'>",
+        "<class 'str'>",
+        "<class 'list'>",
+        "<class 'dict'>",
+    ]
+
+    # convert first back and check types
+    with SqlCounter(query_count=1):
+        assert (
+            snow_df["col"].to_pandas().apply(lambda x: str(type(x))).tolist()
+            == expected_types
+        )
+
+    # then, apply UDF and check results
+    with SqlCounter(query_count=4, udf_count=1):
+        assert (
+            snow_df["col"].apply(lambda x: str(type(x))).to_pandas().tolist()
+            == expected_types
+        )
+
+    with SqlCounter(query_count=4, udf_count=1):
+        assert snow_df["col"].apply(str).to_pandas().tolist() == [
+            "None",
+            "1",
+            "1.1",
+            "None",
+            "s",
+            "True",
+            "736e6f77",
+            "2023-01-01",
+            "01:01:01",
+            "2023-01-01T01:02:03",
+            "2023-01-01T01:02:03+00:00",
+            "[1, 2, '2023-01-01T01:02:03']",
+            "{'d': '2023-01-01T01:02:03+00:00', 's': 1.2}",
+        ]
+
+
+def test_apply_null_nan():
+    # data becomes 1, 1.1, NaN and NULL in a FLOAT column in Snowflake, which correspond to
+    # 1, 1.1, np.nan, np.nan in vectorized UDF and returned as float64 type instead of Float64 type
+    # So the last two elements both become missing values
+    # So when checking the result, we compare with float64 type
+    data = [1, 1.1, "NaN", None]
+    snow_series = pd.Series(data, dtype="Float64")
+    native_series = native_pd.Series(data, dtype="float64")
+
+    with SqlCounter(query_count=4, udf_count=1):
+        eval_snowpark_pandas_result(
+            snow_series, native_series, lambda x: x.apply(lambda x: x)
+        )
+
+    with SqlCounter(query_count=4, udf_count=1):
+        assert snow_series.apply(lambda x: str(type(x))).to_pandas().tolist() == [
+            "<class 'float'>",
+            "<class 'float'>",
+            "<class 'float'>",
+            "<class 'float'>",
+        ]
+
+    # It will become a number column in Snowflake, and in vectorized UDF, we use Int32/64Dtype
+    # where NULL is encoded as pd.NA
+    snow_series = pd.Series([None, None])
+    with SqlCounter(query_count=4, udf_count=1):
+        assert snow_series.apply(lambda x: str(type(x))).to_pandas().tolist() == [
+            "<class 'pandas._libs.missing.NAType'>",
+            "<class 'pandas._libs.missing.NAType'>",
+        ]
+
+
+@sql_count_checker(query_count=3)
+def test_apply_json_serializable_negative():
+    snow_series = pd.Series([1])
+
+    # In Python UDF, if the return type is variant, the return value must be
+    # json serializable so it can become a variant in Snowflake.
+    # type() returns a type object which is not json serializable.
+    with pytest.raises(SnowparkSQLException, match="is not JSON serializable"):
+        snow_series.apply(type).to_pandas()
+
+
+def test_apply_args_kwargs():
+    def f(x, y, z=1) -> int:
+        return x + y + z
+
+    native_series = native_pd.Series([1, 2, 3])
+    snow_series = pd.Series([1, 2, 3])
+
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda x: x.apply(f),
+            expect_exception=True,
+            expect_exception_type=SnowparkSQLException,
+            expect_exception_match="missing 1 required positional argument",
+            assert_exception_equal=False,
+        )
+
+    with SqlCounter(query_count=4, udf_count=1):
+        eval_snowpark_pandas_result(
+            snow_series, native_series, lambda x: x.apply(f, args=(1,))
+        )
+
+    with SqlCounter(query_count=4, udf_count=1):
+        eval_snowpark_pandas_result(
+            snow_series, native_series, lambda x: x.apply(f, args=(1,), z=2)
+        )
+
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda x: x.apply(f, args=(1,), z=2, v=3),
+            expect_exception=True,
+            expect_exception_type=SnowparkSQLException,
+            expect_exception_match="got an unexpected keyword argument",
+            assert_exception_equal=False,
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(
+    query_count=20, fallback_count=2, sproc_count=2, expect_high_count=True
+)
+def test_apply_args_kwargs_with_snowpark_pandas_object_fallback():
+    def f(x, y=None) -> int:
+        return x + (y.sum() if y is not None else 0)
+
+    native_series = native_pd.Series([1, 2, 3])
+    snow_series = pd.Series([1, 2, 3])
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_series.apply(f, args=(pd.Series([1, 2]),)),
+        native_series.apply(f, args=(native_pd.Series([1, 2]),)),
+    )
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_series.apply(f, y=pd.Series([1, 2])),
+        native_series.apply(f, y=native_pd.Series([1, 2])),
+    )
+
+
+@pytest.mark.parametrize("func", [str, int, float, bytes, list, dict])
+@sql_count_checker(query_count=8, udf_count=2)
+def test_apply_builtin(func):
+    if func is list:
+        data = [{"a": "b", "c": "d"}, {"b": "a", "d": "c"}]
+    elif func is dict:
+        data = [[("a", "b"), ("c", "d")], [("b", "a"), ("d", "c")]]
+    else:
+        data = [1, 2]
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.apply(func))
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.map(func))
+
+
+@pytest.mark.parametrize("func", TEST_NUMPY_FUNCS)
+@sql_count_checker(query_count=8, udf_count=2)
+def test_apply_and_map_numpy(func):
+    data = [1.0, 2.0, 3.0]
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.apply(func))
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.map(func))
+
+
+@pytest.mark.parametrize(
+    "native_series, expected_query_count, expected_udf_count",
+    [
+        (
+            native_pd.Series(dtype=object, name="foo", index=pd.Index([], name="bar")),
+            10,
+            4,
+        ),
+        (native_pd.Series(index=[1, 2, 3], dtype=np.float64), 8, 2),
+    ],
+)
+@sql_count_checker(query_count=8, udf_count=2)
+def test_apply_and_map_empty(native_series, expected_query_count, expected_udf_count):
+    def f(x) -> float:
+        return x
+
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.apply(f))
+    eval_snowpark_pandas_result(snow_series, native_series, lambda x: x.map(f))
+
+
+@sql_count_checker(query_count=3)
+def test_apply_convert_dtype(caplog):
+    snow_series = pd.Series([1])
+
+    caplog.clear()
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.WARNING):
+        snow_series.apply(lambda x: x, convert_dtype=True)
+        assert "convert_dtype is ignored in Snowflake backend" in caplog.text
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "func",
+    [[np.min], {2: np.min, 1: "max"}]
+    # TODO SNOW-864025: enable following after str in df.apply is supported
+    # ["min", "mode", "abs"]
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_apply_input_type_str_list_dict(func):
+    data = [1.0, 2.0, 3.0]
+    native_series = native_pd.Series(data)
+    snow_series = pd.Series(data)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda x: x.apply(func), check_index=False
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(
+    query_count=16, fallback_count=2, sproc_count=2, expect_high_count=True
+)
+def test_map_na_action_ignore():
+    snow_series = pd.Series([1, 1.1, "NaN", None], dtype="Float64")
+
+    # In native pandas, the last two elements are NaN and pd.NA
+    assert snow_series.map(
+        lambda x: x is None, na_action="ignore"
+    ).to_pandas().to_list() == [False, False, None, None]
+
+    data = ["cat", "dog", np.nan, "rabbit"]
+    snow_series = pd.Series(data)
+    native_series = native_pd.Series(data)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda x: x.map("I am a {}".format, na_action="ignore"),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_map_dict():
+    s = pd.Series(["cat", "dog", np.nan, "rabbit"])
+    assert s.map({"cat": "kitten", "dog": "puppy"}).to_pandas().tolist() == [
+        "kitten",
+        "puppy",
+        None,
+        None,
+    ]
+
+
+@sql_count_checker(query_count=8, udf_count=2)
+def test_apply_variant_json_null():
+    def f(x):
+        if native_pd.isna(x):
+            return x
+        elif x == 1:
+            return None
+        elif x == 2:
+            return np.nan
+        elif x == 3:
+            return native_pd.NA
+        else:
+            return x
+
+    # The last element in this numeric column becomes np.nan in Python UDF -> SQL null
+    series = pd.Series([1, 2, 3, 4, None])
+    assert series.apply(f).isna().tolist() == [False, True, True, False, True]
+
+    # The last element in this string column becomes pd.NA in Python UDF -> SQL null
+    series = pd.Series(["s", "t", "null", None])
+    assert series.apply(f).isna().tolist() == [False, False, False, True]
+
+
+# This import is related to the test below. Do not remove.
+import scipy  # noqa: E402
+
+
+@pytest.mark.parametrize(
+    "package,expected_query_count",
+    [("scipy", 7), ("scipy>=1.0", 7), ("scipy<1.12.0", 7), (scipy, 9)],
+)
+def test_3rd_party_package_with_udf_annotation(package, expected_query_count):
+
+    with SqlCounter(
+        query_count=expected_query_count,
+        udf_count=1,
+        high_count_expected=True,
+        high_count_reason="Snowpark package management uses many queries.",
+    ):
+        # There are multiple ways on how to call UDFs depending on packages over a Series, test this here
+        data = [1, 2, 3, 6, 7, 8]
+        snow_series = pd.Series(data)
+        native_series = native_pd.Series(data)
+
+        try:
+            # this snowpark setting is required for the last use case
+            pd.session.custom_package_usage_config["enabled"] = True
+
+            # Note: @udf decorator always requires specifying return_type
+            @udf(packages=[package], return_type=DoubleType())
+            def func_with_local_import(x):
+                from scipy.stats import erlang
+
+                # This API is regarded as stable for the test versions, i.e. no matter the package identifier - the result
+                # should be identical.
+                var = erlang.pdf(x, 4)
+                return var
+
+            snow_ans = snow_series.apply(func_with_local_import)
+
+            # apply UDF without snowpark function decorator
+            native_ans = native_series.apply(func_with_local_import.func)
+
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow_ans, native_ans
+            )
+        finally:
+            pd.session.clear_packages()
+            pd.session.clear_imports()
+
+
+# These imports are related to the test below. Do not remove.
+import numpy as np  # noqa: E402
+import statsmodels  # noqa: E402
+
+
+@pytest.mark.parametrize(
+    "packages,expected_query_count",
+    [
+        (["statsmodels", "numpy"], 4),
+        (["statsmodels==0.14.0", "numpy>=1.0"], 4),
+        ([statsmodels, np], 5),
+    ],
+)
+def test_3rd_party_package_with_session(packages, expected_query_count):
+    # Use in this test different package (statsmodels) to isolate test from scipy test above.
+    with SqlCounter(query_count=expected_query_count, udf_count=1):
+        # There are multiple ways on how to call UDFs depending on packages over a Series, test this here
+        data = [1, 2, 3, 6, 7, 8]
+        snow_series = pd.Series(data)
+        native_series = native_pd.Series(data)
+
+        # import outside of function
+        import numpy as np
+        import statsmodels.api as sm
+
+        def func(nsample):
+            x = np.linspace(0, 10, nsample)
+            X = np.column_stack((x, x**2))
+            beta = np.array([1, 0.1, 10])
+            e = np.random.normal(size=nsample)
+            X = sm.add_constant(X)
+            y = np.dot(X, beta) + e
+            model = sm.OLS(y, X)
+            results = model.fit()
+            return results.rsquared
+
+        try:
+            # this snowpark setting is required for the last use case
+            pd.session.custom_package_usage_config["enabled"] = True
+            pd.session.add_packages(packages)
+
+            snow_ans = snow_series.apply(func)
+        finally:
+            pd.session.clear_packages()
+            pd.session.clear_imports()
+
+        native_ans = native_series.apply(func)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow_ans, native_ans, atol=0.1
+        )
+
+
+@pytest.mark.parametrize("udf_packages,session_packages", [(["pandas", np], [scipy])])
+@sql_count_checker(query_count=5, join_count=2, udf_count=1)
+def test_3rd_party_package_mix_and_match(udf_packages, session_packages):
+
+    snow_series = pd.Series([1])
+
+    def func(x):
+        import pandas as pd
+
+        return (pd.__version__, np.__version__, scipy.__version__)
+
+    try:
+        # this snowpark setting is required for the last use case
+        pd.session.custom_package_usage_config["enabled"] = True
+        pd.session.add_packages(session_packages)
+
+        snow_ans = snow_series.apply(func)
+        ans = snow_ans.iloc[0]
+    finally:
+        pd.session.clear_packages()
+        pd.session.clear_imports()
+
+    assert len(ans) == 3
diff --git a/tests/integ/modin/series/test_astype.py b/tests/integ/modin/series/test_astype.py
new file mode 100644
index 00000000000..9deff99e242
--- /dev/null
+++ b/tests/integ/modin/series/test_astype.py
@@ -0,0 +1,450 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+from datetime import date, time
+from itertools import product
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
+from pandas.core.arrays.integer import (
+    Int8Dtype,
+    Int16Dtype,
+    Int32Dtype,
+    Int64Dtype,
+    UInt8Dtype,
+    UInt16Dtype,
+    UInt32Dtype,
+    UInt64Dtype,
+)
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.dtypes.common import is_extension_array_dtype
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.type_utils import (
+    NUMPY_SNOWFLAKE_TYPE_PAIRS,
+    PANDAS_EXT_SNOWFLAKE_TYPE_PAIRS,
+    TypeMapper,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from snowflake.snowpark.types import _FractionalType, _IntegralType
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+def basic_types():
+    supported_dtypes = [
+        "boolean" if isinstance(p[0], BooleanDtype) else p[0]
+        for p in (NUMPY_SNOWFLAKE_TYPE_PAIRS + PANDAS_EXT_SNOWFLAKE_TYPE_PAIRS)
+        if p[0] != np.object_
+    ]
+    # include python types
+    supported_dtypes += [int, float, str]
+    return supported_dtypes
+
+
+BASIC_ASTYPE_CASES = list(product(basic_types(), basic_types()))
+
+
+EXTENSION_TYPE_TO_NUMPY_DTYPE = {
+    "boolean": np.bool_,
+    Float32Dtype(): np.float_,
+    Float64Dtype(): np.float_,
+    Int64Dtype(): np.int64,
+    UInt64Dtype(): np.uint64,
+    Int32Dtype(): np.int32,
+    UInt32Dtype(): np.uint32,
+    Int16Dtype(): np.int16,
+    UInt16Dtype(): np.uint16,
+    Int8Dtype(): np.int8,
+    UInt8Dtype(): np.uint8,
+    StringDtype(): np.object_,
+}
+
+
+def get_expected_dtype(to_dtype):
+    # special handling for expected dtype, i.e, the expected result from series.dtype
+    if to_dtype is str:
+        expected_dtype = np.object_
+    elif is_extension_array_dtype(to_dtype):
+        expected_dtype = EXTENSION_TYPE_TO_NUMPY_DTYPE[to_dtype]
+    else:
+        expected_dtype = to_dtype
+    expected_dtype = np.dtype(expected_dtype)
+    if isinstance(TypeMapper.to_snowflake(expected_dtype), _IntegralType):
+        expected_dtype = np.dtype("int64")
+    elif isinstance(TypeMapper.to_snowflake(expected_dtype), _FractionalType):
+        expected_dtype = np.dtype("float64")
+    return expected_dtype
+
+
+def get_expected_to_pandas_dtype(to_dtype, expected_dtype):
+    if to_dtype is str or to_dtype == "string[python]":
+        to_pandas_dtype = str
+    else:
+        to_pandas_dtype = expected_dtype
+    return to_pandas_dtype
+
+
+@pytest.mark.parametrize("from_dtype, to_dtype", BASIC_ASTYPE_CASES)
+def test_astype_basic(from_dtype, to_dtype):
+    seed = [0, 1, 2, 3]
+    if from_dtype == "boolean":
+        # seed used for boolean
+        seed = [True, False, False, True]
+
+    expected_dtype = get_expected_dtype(to_dtype)
+
+    # verify snowpark pandas raise similar exceptions as pandas in the following cases
+    type_error_msg = "cannot be converted"
+    if (
+        isinstance(from_dtype, StringDtype) or from_dtype is str
+    ) and to_dtype == "datetime64[ns]":
+        with SqlCounter(query_count=1):
+            # Snowpark pandas use Snowflake auto format detection and the behavior can be different from native pandas
+            assert_snowpark_pandas_equal_to_pandas(
+                pd.Series(seed, dtype=from_dtype).astype(to_dtype),
+                native_pd.Series(
+                    [
+                        native_pd.Timestamp("1970-01-01 00:00:00"),
+                        native_pd.Timestamp("1970-01-01 00:00:01"),
+                        native_pd.Timestamp("1970-01-01 00:00:02"),
+                        native_pd.Timestamp("1970-01-01 00:00:03"),
+                    ]
+                ),
+            )
+            with pytest.raises(ValueError, match="day is out of range for month"):
+                # use dypte=str instead of StringDType since the SP fallback will create the original dataframe in str
+                native_pd.Series(seed, dtype=str).astype(expected_dtype)
+    elif from_dtype == "datetime64[ns]" and "float" in str(to_dtype).lower():
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError, match=type_error_msg):
+                pd.Series(seed, dtype=from_dtype).astype(to_dtype).to_pandas()
+            with pytest.raises(TypeError, match="Cannot cast"):
+                native_pd.Series(seed, dtype=from_dtype).astype(expected_dtype)
+    else:
+        with SqlCounter(query_count=1):
+            # verify snowpark pandas series.dtype result
+            s = pd.Series(seed, dtype=from_dtype).astype(to_dtype)
+            assert s.dtype == expected_dtype
+
+            # verify snowpark pandas series.to_pandas() result
+            expected_to_pandas = native_pd.Series(seed, dtype=from_dtype).astype(
+                get_expected_to_pandas_dtype(to_dtype, expected_dtype)
+            )
+            if from_dtype in (
+                float,
+                np.float_,
+                np.float16,
+                np.float32,
+                Float32Dtype(),
+                Float64Dtype(),
+            ) and (to_dtype == "string[python]" or to_dtype is str):
+                # snowpark pandas prints float value 1.0 to string "1" while pandas prints "1.0"
+                expected_to_pandas = expected_to_pandas.astype(np.float64).apply(
+                    lambda x: f"{x:.0f}"
+                )
+            # verify the values in the series are equal
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                s,
+                expected_to_pandas,
+                check_datetimelike_compat=True,
+            )
+
+
+@pytest.mark.parametrize("from_dtype", basic_types())
+@pytest.mark.parametrize(
+    "to_tz",
+    [
+        "UTC",
+        "Asia/Tokyo",
+        "America/Los_Angeles",
+    ],
+)
+def test_astype_to_DatetimeTZDtype(from_dtype, to_tz):
+    to_dtype = f"datetime64[ns, {to_tz}]"
+    seed = (
+        [True, False, False, True]
+        # if isinstance(from_dtype, BooleanDtype)
+        if from_dtype == "boolean"
+        else [0, 1, 2, 3]
+    )
+
+    if from_dtype == np.bool_ or from_dtype == "boolean":
+        error_msg = "cannot be converted to datetime64"
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError, match=error_msg):
+                pd.Series(seed, dtype=from_dtype).astype(to_dtype).to_pandas()
+            with pytest.raises(TypeError, match=error_msg):
+                native_pd.Series(seed, dtype=from_dtype).astype(to_dtype)
+    elif isinstance(from_dtype, StringDtype) or from_dtype is str:
+        # Snowpark pandas use Snowflake auto format detection and the behavior can be different from native pandas
+        # to_pandas always convert timezone to the local timezone today, i.e., "America/Los_angeles"
+        with SqlCounter(query_count=1):
+            assert_snowpark_pandas_equal_to_pandas(
+                pd.Series(seed, dtype=from_dtype).astype(to_dtype),
+                native_pd.Series(
+                    [
+                        native_pd.Timestamp("1970-01-01 00:00:00", tz="UTC").tz_convert(
+                            "America/Los_Angeles"
+                        ),
+                        native_pd.Timestamp("1970-01-01 00:00:01", tz="UTC").tz_convert(
+                            "America/Los_Angeles"
+                        ),
+                        native_pd.Timestamp("1970-01-01 00:00:02", tz="UTC").tz_convert(
+                            "America/Los_Angeles"
+                        ),
+                        native_pd.Timestamp("1970-01-01 00:00:03", tz="UTC").tz_convert(
+                            "America/Los_Angeles"
+                        ),
+                    ]
+                ),
+            )
+            with pytest.raises(ValueError, match="day is out of range for month"):
+                # use dypte=str instead of StringDType
+                native_pd.Series(seed, dtype=str).astype(to_dtype)
+    else:
+        with SqlCounter(query_count=1):
+            s = pd.Series(seed, dtype=from_dtype).astype(to_dtype)
+            # Snowflake timestamp_tz column's metadata does not contain the tzinfo so it cannot provide dtype as
+            # datetime64[ns, UTC], so its dtype returns datetime64 or <M8[ns]
+            assert s.dtype == np.dtype("<M8[ns]")
+            #
+            # native_pd.Series([0,1,2], dtype="float64").astype("datetime64[ns, Asia/Tokyo]")
+            # 0             1970-01-01 00:00:00+09:00
+            # 1   1970-01-01 00:00:00.000000001+09:00
+            # 2   1970-01-01 00:00:00.000000002+09:00
+            # dtype: datetime64[ns, Asia/Tokyo]
+            #
+            # The result is wrong and also with warning in pandas:
+            # /var/folders/nw/8qf9s1jd01q15skcgp93r2p80000gn/T/ipykernel_3495/2932902506.py:1: FutureWarning: The behavior
+            # of DatetimeArray._from_sequence with a timezone-aware dtype and floating-dtype data is deprecated. In a future
+            # version, this data will be interpreted as nanosecond UTC timestamps instead of wall-times, matching the
+            # behavior with integer dtypes. To retain the old behavior, explicitly cast to 'datetime64[ns]' before passing
+            # the data to pandas. To get the future behavior, first cast to 'int64'.
+            # Snowpark pandas does the right way to convert it to int64 first and then convert to datetime64.
+            #
+            # pd.Series([0,1,2], dtype="int64").astype("datetime64[ns, Asia/Tokyo]")
+            # 0             1970-01-01 09:00:00+09:00
+            # 1   1970-01-01 09:00:00.000000001+09:00
+            # 2   1970-01-01 09:00:00.000000002+09:00
+            # dtype: datetime64[ns, Asia/Tokyo]
+            if "float" in str(from_dtype).lower():
+                from_dtype = "int64"
+
+            if from_dtype == "datetime64[ns]":
+                # Native pandas after 2.0 disallows using astype to convert from timzone-naive to timezone-aware
+                # This remains valid in Snowflake, so Snowpark pandas performs the conversion anyway
+                with pytest.raises(
+                    TypeError,
+                    match="Cannot use .astype to convert from timezone-naive dtype to timezone-aware dtype.",
+                ):
+                    native_pd.Series(seed, dtype=from_dtype).astype(to_dtype)
+                expected_to_pandas = (
+                    native_pd.Series(seed, dtype=from_dtype).dt.tz_localize("UTC")
+                    # Snowpark pandas to_pandas() will convert timestamp_tz to default local timezone
+                    .dt.tz_convert("America/Los_Angeles")
+                )
+            else:
+                expected_to_pandas = (
+                    native_pd.Series(seed, dtype=from_dtype).astype(to_dtype)
+                    # Snowpark pandas to_pandas() will convert timestamp_tz to default local timezone
+                    .dt.tz_convert("America/Los_Angeles")
+                )
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                s,
+                expected_to_pandas,
+                check_datetimelike_compat=True,
+            )
+
+
+@pytest.mark.parametrize(
+    "from_tz",
+    [
+        "UTC",
+        "Asia/Tokyo",
+        "America/Los_Angeles",
+    ],
+)
+def test_astype_from_DatetimeTZDtype_to_datetime64(from_tz):
+    from_dtype = f"datetime64[ns, {from_tz}]"
+    to_dtype = "datetime64[ns]"
+    native = native_pd.Series([0, 1, 2, 3], dtype=from_dtype)
+    snow = pd.Series(native)
+    expected_dtype = get_expected_dtype(to_dtype)
+    with SqlCounter(query_count=1):
+        s = snow.astype(to_dtype)
+        assert s.dtype == expected_dtype
+        # Native pandas after 2.0 disallows using astype to convert from timzone-aware to timezone-naive
+        # This remains valid in Snowflake, so Snowpark pandas performs the conversion anyway
+        with pytest.raises(
+            TypeError,
+            match="Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype.",
+        ):
+            native.astype(to_dtype)
+        expected_to_pandas = native.dt.tz_convert(None)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snow.astype(to_dtype),
+            expected_to_pandas,
+            check_datetimelike_compat=True,
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_astype_uint():
+    # Snowflake’s numeric types are always signed. So astype(‘uint8’) in this case has no effect on the original series.
+    s = pd.Series([-1, 0, 1]).astype("uint8")
+    assert s.dtype == "int64"
+    assert_snowpark_pandas_equal_to_pandas(
+        s, native_pd.Series([-1, 0, 1]).astype("int64")
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_astype_none():
+    # s = pd.Series([3.14, np.nan, None], dtype="float64").astype(str)
+    # expected = pd.Series(["3.14", np.nan, None], dtype=str)
+    # assert_series_equal(s, expected)
+    # # verify to_pandas results too
+    # assert_series_equal(s, native_pd.Series(["3.14", np.nan, None], dtype=str))
+    #
+    # # pandas will raise ValueError when casting a series with null values to int
+    # with pytest.raises(ValueError):
+    #     native_pd.Series([1, 2, 3, None]).astype(int)
+    # # but snowpark pandas allows it since Snowflake backend support null values
+    # s = pd.Series([1, 2, 3, None]).astype(int)
+    # assert s.dtype == "int64"
+    # assert_series_equal(s, native_pd.Series([1, 2, 3, None]))
+
+    # date with none
+    s = pd.Series([date(year=2011, month=1, day=1)] * 3 + [None, np.nan, pd.NA]).astype(
+        bool
+    )
+    s2 = pd.Series([True] * 3 + [None, np.nan, pd.NA]).astype(bool)
+    assert_series_equal(s, s2)
+
+
+@pytest.mark.parametrize("to_dtype", basic_types())
+@pytest.mark.parametrize(
+    "seed",
+    [
+        [date(year=2011, month=1, day=1)] * 3,
+        [time(0), time(1), time(2)],
+    ],
+)
+def test_python_date_and_time(seed, to_dtype):
+    s = pd.Series(seed)
+    native = native_pd.Series(seed)
+    expected_dtype = get_expected_dtype(to_dtype)
+    expected_to_pandas_dtype = get_expected_to_pandas_dtype(to_dtype, expected_dtype)
+
+    to_dtype_str = str(to_dtype).lower()
+    if (
+        "int" in to_dtype_str
+        or "float" in to_dtype_str
+        or ("datetime64" in to_dtype_str and isinstance(seed[0], time))
+    ):
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError):
+                native.astype(to_dtype)
+            with pytest.raises(TypeError, match="cannot be converted"):
+                s.astype(to_dtype).to_pandas()
+    else:
+        with SqlCounter(query_count=1):
+            snow = s.astype(to_dtype)
+            assert snow.dtype == expected_dtype
+            expected_to_pandas = native.astype(expected_to_pandas_dtype)
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow,
+                expected_to_pandas,
+                check_datetimelike_compat=True,
+            )
+
+
+@pytest.mark.parametrize(
+    "seed",
+    [
+        [time(0), time(1), time(2)],
+        [date(year=2011, month=1, day=1)] * 3,
+    ],
+)
+def test_python_datetime_astype_DatetimeTZDtype(seed):
+    to_dtype = "datetime64[ns, UTC]"
+    s = pd.Series(seed)
+    native = native_pd.Series(seed)
+    if isinstance(seed[0], time):
+        with SqlCounter(query_count=0):
+            with pytest.raises(TypeError):
+                native.astype(to_dtype)
+            with pytest.raises(TypeError, match="cannot be converted"):
+                s.astype(to_dtype).to_pandas()
+    else:
+        with SqlCounter(query_count=1):
+            snow = s.astype(to_dtype)
+            assert snow.dtype == np.dtype("<M8[ns]")
+            expected_to_pandas = (
+                native.astype(to_dtype)
+                # Snowpark pandas to_pandas() will convert timestamp_tz to default local timezone
+                .dt.tz_convert("America/Los_Angeles")
+            )
+            assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                snow,
+                expected_to_pandas,
+                check_datetimelike_compat=True,
+            )
+
+
+@sql_count_checker(query_count=0)
+def test_astype_object():
+    assert pd.Series([1, 2, 3], dtype="int16").astype(np.object_).dtype == "object"
+
+
+@sql_count_checker(query_count=0)
+def test_astype_copy():
+    s1 = pd.Series([1, 2, 3])
+    s2 = s1.astype(str, copy=False)
+    assert s1.dtype == "object"
+    assert s2 is None
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            ["2001-10-24", "2002-11-11"],
+            native_pd.Series(["2001-10-24", "2002-11-11"]).astype("datetime64[ns]"),
+        ),
+        (
+            ["2001-10-24", "2002-11-116"],
+            native_pd.Series(["2001-10-24", "2002-11-116"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_astype_errors_ignore_fallback(data, expected):
+    s1 = pd.Series(data).astype("datetime64[ns]", errors="ignore")
+    assert_snowpark_pandas_equal_to_pandas(s1, expected)
+
+
+@sql_count_checker(query_count=0)
+def test_astype_int64_warning(caplog):
+    caplog.clear()
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.WARNING):
+        pd.Series(["2001", "2002"]).astype("int16")
+        assert "Snowpark pandas API auto cast all integers to int64" in caplog.text
diff --git a/tests/integ/modin/series/test_axis.py b/tests/integ/modin/series/test_axis.py
new file mode 100644
index 00000000000..9f558950406
--- /dev/null
+++ b/tests/integ/modin/series/test_axis.py
@@ -0,0 +1,487 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+# Tests for Series.set_axis()
+# ---------------------------
+
+# Valid data
+# ----------
+# Format: series, axis, labels, number of SQL queries on copy, number of JOINs.
+# This data covers the positive cases for Series.set_axis().
+# "index", "rows", and 0 are valid axis values and are tested on different Series objects with valid labels.
+# The valid labels include strings with quotation marks, None values, numbers, Index, and MultiIndex objects.
+# Some of the Series and Index inputs have "name".
+TEST_DATA_FOR_SERIES_SET_AXIS = [
+    [
+        native_pd.Series({"A": [1, 2, 3], 5 / 6: [4, 5, 6]}),
+        "index",
+        [None] * 2,
+        3,
+        1,
+    ],
+    [
+        native_pd.Series(
+            {
+                "fibonacci": [0, 1, 2, 3, 5, 8],
+                "squares": [0, 1, 4, 9, 16, 25],
+                "primes": [2, 3, 5, 7, 11, 13],
+            },
+            name="special numbers series",
+        ),
+        "index",
+        ["iccanobif", "serauqs", "semirp"],
+        3,
+        1,
+    ],
+    [
+        native_pd.Series(
+            {
+                "fibonacci": [0, 1, 2, 3, 5, 8],
+                "squares": [0, 1, 4, 9, 16, 25],
+                "primes": [2, 3, 5, 7, 11, 13],
+            },
+            name="special numbers series",
+        ),
+        "index",
+        native_pd.Series(["iccanobif", "serauqs", "semirp"], name="reverse names"),
+        3,
+        1,
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        0,
+        native_pd.Index([99, 999, 9999, 99999, 999999]),
+        3,
+        1,
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        0,
+        native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
+        3,
+        1,
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            },
+            name="series with name",
+        ),
+        0,
+        native_pd.Index([99, 999, 9999, 99999, 999999], name="index with name"),
+        3,
+        1,
+    ],
+    [  # Index is a MultiIndex from tuples.
+        native_pd.Series({"A": [1, 2, 3], -2515 / 135: [4, 5, 6]}),
+        "index",
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA"), ("r1", "rB")], names=["Courses", "Fee"]
+        ),
+        4,
+        2,
+    ],
+    [  # Index is a MultiIndex from arrays.
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        0,
+        native_pd.MultiIndex.from_arrays(
+            [
+                ["genmaicha", "peppermint", "jasmine", "spice", "earl grey"],
+                [5, 2.5, 6, 1, 3.5],
+                ["mild", "none", "mild", "high", "high"],
+            ],
+            names=("tea", "steep time", "caffeine"),
+        ),
+        5,
+        3,
+    ],
+    [  # Index is a MultiIndex from a DataFrame.
+        native_pd.Series(
+            {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9], "D": [10, 11, 12]}
+        ),
+        "rows",
+        native_pd.MultiIndex.from_frame(
+            native_pd.DataFrame(
+                [["HI", "Temp"], ["HI", "Precip"], ["NJ", "Temp"], ["NJ", "Precip"]],
+                columns=["a", "b"],
+            ),
+        ),
+        4,
+        2,
+    ],
+    [  # Index is a MultiIndex from a product.
+        native_pd.Series({1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6]}),
+        0,
+        native_pd.MultiIndex.from_product(
+            [[0, 1, 2], ["green", "purple"]], names=["number", "color"]
+        ),
+        4,
+        2,
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "index",
+        {1: 1, 2: 2},
+        3,
+        1,
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "rows",
+        {1, 2},
+        3,
+        1,
+    ],
+]
+
+
+# Invalid data which raises ValueError
+# ------------------------------------
+# Format: series, axis, labels, and error message.
+# - This data is for Series.set_axis() with invalid axis values.
+# - Invalid axis values here consist of: None values, numbers other than 0,
+#   strings other than "index" and "rows", and column values like "columns" and 1.
+TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_VALUE_ERROR = [
+    [
+        native_pd.Series({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        "indexes",  # invalid axis
+        native_pd.Index(["a", "b", "c"]),
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        -1111.1111,  # invalid axis
+        {99, 999, 9999, 99999, 999999},
+    ],
+    [
+        native_pd.Series({"A": [1, 2, 3], "B": [None] * 3}),
+        4000000,  # invalid axis
+        ["None"] * 3,
+    ],
+    [
+        native_pd.Series({"A": [3.14, 1.414, 1.732], "B": [9.8, 1.0, 0]}),
+        None,  # invalid axis
+        {1: 1, 2: 2, 3: 3},
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        -97 / 23,  # invalid axis
+        native_pd.Series(["index"] * 4),
+    ],
+    [
+        native_pd.Series(
+            {"A": ["a", "b", "c", "d", "e"], "B": ["e", "d", "c", "b", "a"]}
+        ),
+        "0",  # invalid axis
+        native_pd.Index([11, 111]),
+    ],
+    [
+        native_pd.Series({"foo": [None], "bar": [None], "baz": [None]}),
+        -0.00001,  # invalid axis
+        native_pd.Index([None] * 3),
+    ],
+    [  # Index is a MultiIndex from tuples.
+        native_pd.Series({"A": [1, 2, 3], -2515 / 135: [4, 5, 6]}),
+        0.000001,  # invalid axis
+        native_pd.MultiIndex.from_tuples(
+            [("r0", "rA"), ("r1", "rB")], names=["Courses", "Fee"]
+        ),
+    ],
+    [  # Labels is a MultiIndex from a DataFrame.
+        native_pd.Series(
+            {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9], "D": [10, 11, 12]}
+        ),
+        1,  # invalid axis
+        native_pd.MultiIndex.from_frame(
+            native_pd.DataFrame(
+                [["HI", "Temp"], ["HI", "Precip"], ["NJ", "Temp"], ["NJ", "Precip"]],
+                columns=["a", "b"],
+            ),
+        ),
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "columns",  # invalid axis; valid axis for DataFrames
+        None,  # invalid labels
+    ],
+]
+
+
+# Invalid data which raises ValueError different from native pandas
+# -----------------------------------------------------------------
+# Format: series, axis, labels, and error message.
+# - This data is for Series.set_axis() with invalid labels.
+# - Invalid labels consist of too many or too few labels provided.
+TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_VALUE_ERROR_DIFF_ERROR_MSG = [
+    [
+        native_pd.Series({"A": [1, 2, 3], "B": [4, 5, 6]}),
+        "index",
+        ["a", "b", "" "c" "", "d"],  # too many labels
+        "Length mismatch: Expected 2 rows, received array of length 4",
+    ],
+    [
+        native_pd.Series({"foo": [None], "bar": [None], "baz": [None]}),
+        0,
+        native_pd.Index([None]),  # too few labels
+        "Length mismatch: Expected 3 rows, received array of length 1",
+    ],
+    [  # Labels is a MultiIndex from arrays.
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "'C'": [4, 5, 6],
+                '"D"': [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        "rows",
+        native_pd.MultiIndex.from_arrays(
+            [[], [], []],
+            names=("tea", "steep time", "caffeine"),
+        ),  # too few labels
+        "Length mismatch: Expected 5 rows, received array of length 0",
+    ],
+    [  # Labels is a MultiIndex from a product.
+        native_pd.Series({1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6]}),
+        "index",
+        native_pd.MultiIndex.from_product(
+            [[0], ["green", "purple", "pink", "brown", "orange"]],
+            names=["number", "color"],
+        ),  # too many labels
+        "Length mismatch: Expected 6 rows, received array of length 5",
+    ],
+]
+
+
+# Invalid data which raises TypeError different from native pandas
+# ----------------------------------------------------------------
+# Format: series, axis, labels, and error message.
+# - This data is for Series.set_axis() with invalid axis values.
+# - This includes using list, Index, Series, DataFrame, and MultiIndex objects
+# - for axis or passing in a scalar value for labels
+# - Data below raises TypeError.
+TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_TYPE_ERROR = [
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "rows",
+        1,  # invalid labels type
+        re.escape(
+            "Index(...) must be called with a collection of some kind, 1 was passed"
+        ),
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        "rows",  # invalid axis; valid only for DataFrames
+        ...,  # invalid labels type
+        re.escape(
+            "Index(...) must be called with a collection of some kind, Ellipsis was passed"
+        ),
+    ],
+    [
+        native_pd.Series({"A": [1, 2, 3], "B": [None] * 3}),
+        [],  # invalid axis
+        [None] * 2,
+        "list is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),  # invalid axis
+        ["I", "II"],
+        "Series is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),
+        native_pd.DataFrame(
+            {
+                "A": ["foo", "bar", 3],
+                "B": [4, "baz", 6],
+                "C": [4, "baz", "foo"],
+                "D": [4, 5, None],
+            }
+        ),  # invalid axis
+        ["index"] * 4,
+        "DataFrame is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series(
+            {
+                "A": [None] * 3,
+                "B": [4, 5, 6],
+                "C": [4, 5, 6],
+                "D": [7, 8, 9],
+                "E": [-1, -2, -3],
+            }
+        ),
+        native_pd.Index([11, 111]),  # invalid axis
+        [99, 999, 9999, 99999, 999999],
+        "Index is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series({"A": [1, 2, 3], "B": [None] * 3}),
+        native_pd.MultiIndex.from_tuples([], names=["Courses", "Fee"]),  # invalid axis
+        ["None"] * 3,
+        "MultiIndex is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series({"A": ["foo", "bar", 3], "B": [4, "baz", 6]}),
+        native_pd.MultiIndex.from_frame(
+            native_pd.DataFrame(
+                [["HI", "Temp"], ["HI", "Precip"], ["NJ", "Temp"], ["NJ", "Precip"]],
+                columns=["a", "b"],
+            ),
+        ),  # invalid axis
+        ["I", "II"],
+        "MultiIndex is not a valid type for axis.",
+    ],
+    [
+        native_pd.Series({"A": [], "B": []}),
+        "index",
+        None,  # invalid labels
+        "None is not a valid value for the parameter 'labels'.",
+    ],
+]
+
+
+# Behavior tests for DataFrame.set_axis().
+@pytest.mark.parametrize(
+    "native_series, axis, labels, num_queries, num_joins", TEST_DATA_FOR_SERIES_SET_AXIS
+)
+def test_set_axis_series_copy(native_series, axis, labels, num_queries, num_joins):
+    # Create a copy, perform set_axis on copy, return copy.
+    # Similar to native pandas when copy=True.
+    snowpark_series = pd.Series(native_series)
+    native_res = native_series.set_axis(labels, axis=axis, copy=True)
+
+    with SqlCounter(query_count=num_queries, join_count=num_joins):
+        snowpark_res = snowpark_series.set_axis(labels, axis=axis)
+        # Results should be the same for Snowpark and Native pandas.
+        assert_snowpark_pandas_equal_to_pandas(snowpark_res, native_res)
+
+        # Should return the copy on which set_axis() was performed.
+        assert snowpark_res is not None
+
+        # Results should be different from the Series on which set_axis() was performed.
+        with pytest.raises(AssertionError):
+            assert_snowpark_pandas_equal_to_pandas(snowpark_res, snowpark_series)
+
+
+# Invalid input tests for Series.set_axis().
+@pytest.mark.parametrize(
+    "ser, axis, labels, error_msg",
+    TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_VALUE_ERROR_DIFF_ERROR_MSG,
+)
+@sql_count_checker(query_count=2)
+def test_set_axis_series_raises_value_error_diff_error_msg(
+    ser, axis, labels, error_msg
+):
+    # Should raise a ValueError if length of labels passed in
+    # don't match the number of rows.
+    with pytest.raises(ValueError, match=error_msg):
+        pd.Series(ser).set_axis(labels, axis=axis)
+
+
+@pytest.mark.parametrize(
+    "ser, axis, labels",
+    TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_VALUE_ERROR,
+)
+@sql_count_checker(query_count=0)
+def test_set_axis_series_raises_value_error(ser, axis, labels):
+    # Should raise a ValueError if invalid scalar axis is provided.
+    # Error messages match with native pandas error messages.
+    eval_snowpark_pandas_result(
+        pd.Series(ser),
+        ser,
+        lambda _ser: _ser.set_axis(labels, axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+    )
+
+
+@pytest.mark.parametrize(
+    "ser, axis, labels, error_msg",
+    TEST_DATA_FOR_SERIES_SET_AXIS_RAISES_TYPE_ERROR,
+)
+@sql_count_checker(query_count=0)
+def test_set_axis_series_raises_type_error(ser, axis, labels, error_msg):
+    # Should raise a TypeError if invalid axis is provided of an
+    # unexpected type/object or labels is a scalar value.
+    with pytest.raises(TypeError, match=error_msg):
+        pd.Series(ser).set_axis(labels, axis=axis)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_series_set_axis_copy_true(caplog):
+    # Test that warning is raised when copy argument is used.
+    series = native_pd.Series([1.25])
+
+    caplog.clear()
+    with caplog.at_level(logging.WARNING):
+        native_res = series.set_axis(["hello"], axis=0, copy=True)
+        snowpark_res = pd.Series(series).set_axis(["hello"], axis=0, copy=True)
+        assert_snowpark_pandas_equal_to_pandas(snowpark_res, native_res)
+        assert "keyword is unused and is ignored." in caplog.text
diff --git a/tests/integ/modin/series/test_bitwise_operators.py b/tests/integ/modin/series/test_bitwise_operators.py
new file mode 100644
index 00000000000..49f83f3c20b
--- /dev/null
+++ b/tests/integ/modin/series/test_bitwise_operators.py
@@ -0,0 +1,330 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import operator
+from typing import Any
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pandas.testing as tm
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+# this is full test data
+# SCALAR_BITWISE_TEST_DATA = [-1, 0, 1, True, False]
+# SERIES_BITWISE_TEST_DATA = [
+#     native_pd.Series([1, 2, 3, 4]),
+#     native_pd.Series([True, False, True, False]),
+#     native_pd.Series([1, 2, None, 3], dtype="Int64"),
+#     native_pd.Series([True, None, False, True], dtype="bool"),
+#     native_pd.Series([20121231, 20141231, 99991231, True], dtype="object"),
+# ]
+# BITWISE_TEST_DATA = SCALAR_BITWISE_TEST_DATA + SERIES_BITWISE_TEST_DATA
+
+# this is Snowflake compatible test data, where mapping actually works
+SCALAR_BITWISE_TEST_DATA = [-1, True, False]
+SERIES_BITWISE_TEST_DATA = [
+    native_pd.Series([True, False, True, False]),
+    native_pd.Series([True, None, False, True], dtype="bool"),
+]
+BITWISE_TEST_DATA = SCALAR_BITWISE_TEST_DATA + SERIES_BITWISE_TEST_DATA
+
+
+def try_cast_to_snow_series(value: Any) -> Any:
+    if isinstance(value, native_pd.Series):
+        return pd.Series(
+            data=value.values, dtype=value.dtype, name=value.name, index=value.index
+        )
+
+    return pd.Series(value)
+
+
+@pytest.mark.parametrize("value", BITWISE_TEST_DATA)
+@sql_count_checker(query_count=1)
+def test_bitwise_unary(value):
+
+    # Note: In pandas, using NaN values without specfiying a null-compatible dtype will yield an error.
+    # SnowPandas will allow this behavior.
+    # Note: NaN values like pd.NA, pd.NaT, np.nan will raise a TypeError: boolean value of NA is ambiguous
+    snow_value = try_cast_to_snow_series(value)
+
+    eval_snowpark_pandas_result(snow_value, native_pd.Series(value), lambda s: ~s)
+
+
+@pytest.mark.parametrize("series", SERIES_BITWISE_TEST_DATA)
+@pytest.mark.parametrize("scalar", SCALAR_BITWISE_TEST_DATA)
+@pytest.mark.parametrize(
+    "op", [operator.or_, operator.and_]
+)  # |, &.  ^ is not supported in Snowflake
+@sql_count_checker(query_count=2)
+def test_bitwise_binary_scalar(series, scalar, op):
+    def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
+        snow_ans = op(snow_lhs, snow_rhs)
+        native_ans = op(native_lhs, native_rhs)
+        eval_snowpark_pandas_result(snow_ans, native_ans, lambda x: x)
+
+    # check with left side being a series
+    check_op(series, scalar, pd.Series(series), scalar)
+
+    # check with right side being a series
+    check_op(scalar, series, scalar, pd.Series(series))
+
+
+@pytest.mark.parametrize(
+    "lhs,rhs",
+    [
+        (native_pd.Series(), native_pd.Series()),
+        (
+            native_pd.Series([True, False, True, False]),
+            native_pd.Series([True, True, False, False]),
+        ),
+        (
+            native_pd.Series([True, False, True, False], name="A"),
+            native_pd.Series([True, True, False, False]),
+        ),
+        (
+            native_pd.Series([True, False, True, False], name="A"),
+            native_pd.Series([True, True, False, False], name="B"),
+        ),
+        (
+            native_pd.Series([True, False, True, False], name="A"),
+            native_pd.Series([True, True, False, False], name="A"),
+        ),
+        (
+            native_pd.Series([True, False, True, False], index=[5, 2, 8, 9]),
+            native_pd.Series([True, True, False, False], index=[9, 8, 2, 5]),
+        ),
+        # multi-index series
+        (
+            native_pd.Series(
+                [True, False, True, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(2, 0), (0, 0), (2, 1), (1, 0)]
+                ),
+            ),
+            native_pd.Series(
+                [True, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(0, 0), (1, 0), (2, 0), (2, 1)]
+                ),
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "op", [operator.or_, operator.and_]
+)  # |, &.  ^ is not supported in Snowflake
+@sql_count_checker(query_count=2, join_count=2)
+def test_bitwise_binary_between_series(lhs, rhs, op):
+    def check_op(native_lhs, native_rhs, snow_lhs, snow_rhs):
+        snow_ans = op(snow_lhs, snow_rhs)
+        native_ans = op(native_lhs, native_rhs)
+        # for one multi-index test case (marked with comment) the "inferred_type" doesn't match (Snowpark: float vs. pandas integer)
+        eval_snowpark_pandas_result(
+            snow_ans, native_ans, lambda s: s, check_index_type=False
+        )
+
+    check_op(lhs, rhs, try_cast_to_snow_series(lhs), try_cast_to_snow_series(rhs))
+
+    # commute series
+    check_op(rhs, lhs, try_cast_to_snow_series(rhs), try_cast_to_snow_series(lhs))
+
+
+# Due to differences in logical or/and in SQL and pandas' |,& implementation, behavior doesn't match here, in particular
+# this test case differs between pandas and Snowpark pandas
+# pandas:
+# or: data=[True, False, True, False, True, False], index=[1, 2, 3, 4, 5, 7]
+# and: data=[False, False, False, False, False, False], index=[1, 2, 3, 4, 5, 7]
+# Snowpark pandas:
+# or: data=[True, True, True, None, True, None], index=[1, 2, 3, 4, 5, 7]
+# and: data=[None, None, False, False, False, False], index=[1, 2, 3, 4, 5, 7]
+# (
+#     native_pd.Series([True, False, True, False], index=[1, 3, 5, 7]),
+#     native_pd.Series([True, True, False, False], index=[2, 3, 4, 5]),
+# ),
+
+# this test case also differs between pandas and Snowpark pandas
+# pandas:
+# or: [True, True, False, True, False, False, True, False, False]
+# and: [True, False, False, False, False, False, False, False, False]
+# Snowpark pandas:
+# or: [True, True, True, True, False, None, True, None, None]
+# and: [True, False, None, False, False, False, None, False, None]
+# (
+#     native_pd.Series([True, False, None, True, False, None, True, False, None]),
+#     native_pd.Series([True, True, True, False, False, False, None, None, None]),
+# ),
+@pytest.mark.parametrize(
+    "lhs,rhs,expected_pandas,expected_snowpark_pandas",
+    [
+        (
+            native_pd.Series([True, False, True, False], index=[1, 3, 5, 7]),
+            native_pd.Series([True, True, False, False], index=[2, 3, 4, 5]),
+            native_pd.Series(
+                [True, False, True, False, True, False], index=[1, 2, 3, 4, 5, 7]
+            ),
+            native_pd.Series(
+                data=[True, True, True, None, True, None], index=[1, 2, 3, 4, 5, 7]
+            ),
+        ),
+        (
+            native_pd.Series([True, False, None, True, False, None, True, False, None]),
+            native_pd.Series([True, True, True, False, False, False, None, None, None]),
+            native_pd.Series(
+                [True, True, False, True, False, False, True, False, False]
+            ),
+            native_pd.Series([True, True, True, True, False, None, True, None, None]),
+        ),
+        (
+            native_pd.Series(
+                [True, False, True, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(2, 0), (0, 0), (2, 1), (1, 0)], names=["y", "x"]
+                ),
+            ),
+            native_pd.Series(
+                [True, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(0, 0), (1, 0), (2, 0), (2, 1)], names=["x", "z"]
+                ),
+            ),
+            native_pd.Series(
+                [True, True, True, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [
+                        (2, 0, 0),
+                        (0, 0, 0),
+                        (1, 0, 0),
+                        (2, 1, 0),
+                        (None, 2, 0),
+                        (None, 2, 1),
+                    ],
+                    names=[
+                        "y",
+                        "x",
+                        "z",
+                    ],  # pandas 2.2.x introduces a new, weird order, In pandas 2.1.x the order is correctly preserved.
+                ),
+            ),
+            native_pd.Series(
+                [True, True, True, True, None, None],
+                index=native_pd.MultiIndex.from_tuples(
+                    [
+                        (0, 2.0, 0),
+                        (0, 0.0, 0),
+                        (0, 1.0, 0),
+                        (1, 2.0, 0),
+                        (2, np.nan, 0),
+                        (2, np.nan, 1),
+                    ],
+                    names=["x", "y", "z"],
+                ),
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_bitwise_binary_between_series_with_deviating_behavior_or(
+    lhs, rhs, expected_pandas, expected_snowpark_pandas
+):
+    snow_ans = try_cast_to_snow_series(lhs) | try_cast_to_snow_series(rhs)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ans, expected_snowpark_pandas
+    )
+
+    # test here pandas to track any version regressions
+    native_ans = lhs | rhs
+    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "lhs,rhs,expected_pandas,expected_snowpark_pandas",
+    [
+        (
+            native_pd.Series([True, False, True, False], index=[1, 3, 5, 7]),
+            native_pd.Series([True, True, False, False], index=[2, 3, 4, 5]),
+            native_pd.Series(
+                [False, False, False, False, False, False], index=[1, 2, 3, 4, 5, 7]
+            ),
+            native_pd.Series(
+                [None, None, False, False, False, False], index=[1, 2, 3, 4, 5, 7]
+            ),
+        ),
+        (
+            native_pd.Series([True, False, None, True, False, None, True, False, None]),
+            native_pd.Series([True, True, True, False, False, False, None, None, None]),
+            native_pd.Series(
+                [True, False, False, False, False, False, False, False, False]
+            ),
+            native_pd.Series(
+                [True, False, None, False, False, False, None, False, None]
+            ),
+        ),
+        (
+            native_pd.Series(
+                [True, False, True, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(2, 0), (0, 0), (2, 1), (1, 0)], names=["y", "x"]
+                ),
+            ),
+            native_pd.Series(
+                [True, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [(0, 0), (1, 0), (2, 0), (2, 1)], names=["x", "z"]
+                ),
+            ),
+            native_pd.Series(
+                [True, False, False, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [
+                        (2, 0, 0),
+                        (0, 0, 0),
+                        (1, 0, 0),
+                        (2, 1, 0),
+                        (None, 2, 0),
+                        (None, 2, 1),
+                    ],
+                    names=[
+                        "y",
+                        "x",
+                        "z",
+                    ],  # pandas 2.2.x introduces a new, weird order, In pandas 2.1.x the order is correctly preserved.
+                ),
+            ),
+            native_pd.Series(
+                [True, False, False, True, False, False],
+                index=native_pd.MultiIndex.from_tuples(
+                    [
+                        (0, 2.0, 0),
+                        (0, 0.0, 0),
+                        (0, 1.0, 0),
+                        (1, 2.0, 0),
+                        (2, np.nan, 0),
+                        (2, np.nan, 1),
+                    ],
+                    names=["x", "y", "z"],
+                ),
+            ),
+        ),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_bitwise_binary_between_series_with_deviating_behavior_and(
+    lhs, rhs, expected_pandas, expected_snowpark_pandas
+):
+    snow_ans = try_cast_to_snow_series(lhs) & try_cast_to_snow_series(rhs)
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ans, expected_snowpark_pandas
+    )
+
+    # test here pandas to track any version regressions
+    native_ans = lhs & rhs
+    print(native_ans.index)
+    tm.assert_series_equal(native_ans, expected_pandas, check_index_type=False)
diff --git a/tests/integ/modin/series/test_convert_dtype.py b/tests/integ/modin/series/test_convert_dtype.py
new file mode 100644
index 00000000000..2aad99eb61a
--- /dev/null
+++ b/tests/integ/modin/series/test_convert_dtype.py
@@ -0,0 +1,16 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@sql_count_checker(query_count=0)
+def test_NotImplementedError():
+    s = pd.Series([0, 1, 2])
+    with pytest.raises(NotImplementedError):
+        s.convert_dtypes()
diff --git a/tests/integ/modin/series/test_copy.py b/tests/integ/modin/series/test_copy.py
new file mode 100644
index 00000000000..3b8efe25207
--- /dev/null
+++ b/tests/integ/modin/series/test_copy.py
@@ -0,0 +1,79 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.fixture(scope="function")
+def base_series():
+    return pd.Series([2, 1], name="a")
+
+
+@pytest.fixture()
+def snow_series(base_series):
+    return base_series.copy()
+
+
+@pytest.mark.parametrize("deep", [None, True, False])
+@sql_count_checker(query_count=2)
+def test_copy(deep, snow_series):
+    native_series = snow_series.to_pandas()
+    # Verify copy is same as original
+    assert_snowpark_pandas_equal_to_pandas(snow_series.copy(deep=deep), native_series)
+
+
+@sql_count_checker(query_count=0)
+def test_copy_name_on_deep_copy(snow_series):
+    copy = snow_series.copy(deep=True)
+    copy.name = "b"
+
+    assert snow_series.name == "a"
+    assert copy.name == "b"
+
+
+@sql_count_checker(query_count=0)
+def test_copy_name_on_shallow_copy(snow_series):
+    copy = snow_series.copy(deep=False)
+    copy.name = "b"
+
+    assert snow_series.name == "b"
+    assert copy.name == "b"
+
+
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda series: series.sort_values(inplace=True),
+        lambda series: series.reset_index(inplace=True, drop=True),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_copy_inplace_operations_on_deep_copy(snow_series, operation):
+    native_series = snow_series.to_pandas()
+    copy = snow_series.copy(deep=True)
+    operation(copy)
+
+    # Verify that 'snow_series' is unchanged.
+    assert_snowpark_pandas_equal_to_pandas(snow_series, native_series)
+
+
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda series: series.sort_values(inplace=True),
+        lambda series: series.reset_index(inplace=True, drop=True),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_copy_inplace_operations_on_shallow_copy(snow_series, operation):
+    with SqlCounter(query_count=2):
+        copy = snow_series.copy(deep=False)
+        operation(copy)
+
+        # Verify that 'snow_series' is also changed.
+        assert_snowpark_pandas_equal_to_pandas(snow_series, copy.to_pandas())
diff --git a/tests/integ/modin/series/test_cumulative.py b/tests/integ/modin/series/test_cumulative.py
new file mode 100644
index 00000000000..607a9742df9
--- /dev/null
+++ b/tests/integ/modin/series/test_cumulative.py
@@ -0,0 +1,34 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        [0, 1, 2, 3],
+        [0.1, 0.2, 0.1, 0],
+        [None, 0, None, 0],
+        [],
+    ],
+)
+@pytest.mark.parametrize("func_name", ["cumsum", "cummin", "cummax"])
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=1)
+def test_series_cumfunc(input_data, func_name, skipna):
+    series = pd.Series(input_data)
+    native_series = native_pd.Series(input_data)
+
+    eval_snowpark_pandas_result(
+        series,
+        native_series,
+        lambda s: getattr(s, func_name)(skipna=skipna),
+    )
diff --git a/tests/integ/modin/series/test_describe.py b/tests/integ/modin/series/test_describe.py
new file mode 100644
index 00000000000..5afde06dbc2
--- /dev/null
+++ b/tests/integ/modin/series/test_describe.py
@@ -0,0 +1,160 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime as dt
+
+import modin.pandas as pd
+import numpy as np
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    create_test_series,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    # For numeric data, 7 UNIONs occur because we concat 8 query compilers together:
+    # count, mean, std, min, 0.25, 0.5, 0.75, max
+    # For object data, 5 UNIONs occur:
+    # - 2 to concat 3 query compilers (count, unique, top/freq)
+    # - 1 to NULL-pad the Series to avoid special-case handling top/freq calculation for empty Series
+    # - 2 from a UNION ALL + replicated subquery when transposing the top/freq calculation
+    "data, expected_union_count",
+    [
+        ([1, 10, -1, 20], 7),
+        ([None, 1.0, 0.8, 0.9, 0, 0.1, 0.2, None], 7),
+        # If there are multiple modes, return the first
+        (["k", "j", "j", "k"], 5),
+        (["y", "y", "y", "z"], 5),
+        # Empty series is object by default
+        ([], 5),
+        # Heterogeneous data is considered non-numeric
+        ([1.1, 2.2, "hello", None], 5),
+    ],
+)
+def test_describe(data, expected_union_count):
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_series(data), lambda ser: ser.describe()
+        )
+
+
+@pytest.mark.parametrize(
+    "percentiles, expected_union_count",
+    [
+        # We concat count, std, mean, min, max, and 1 QC for each percentile
+        # median is automatically added if it is not present already
+        ([0.1, 0.2, 0.33, 0.432], 9),
+        ([], 5),
+        ([0.1], 6),
+        ([0.5], 5),
+    ],
+)
+def test_describe_percentiles(percentiles, expected_union_count):
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_series(list(range(10))), lambda ser: ser.describe(percentiles)
+        )
+
+
+# The include and exclude parameters are completely ignored for Series objects, even if
+# they would not be well-formed parameters for Dataframe.describe
+@pytest.mark.parametrize(
+    "include, exclude",
+    [
+        (None, None),
+        (1, 2),  # Even non-dtype arguments are ignored
+        (int, None),
+        (
+            str,
+            None,
+        ),  # Specifying string dtypes (instead of object) is invalid for dataframes
+        (
+            "all",
+            [int],
+        ),  # Specifying non-None exclude with include="all" is invalid for dataframes
+    ],
+)
+@sql_count_checker(query_count=1, union_count=5)
+def test_describe_ignore_include_exclude(include, exclude):
+    data = [f"data{i}" for i in range(10)]
+    eval_snowpark_pandas_result(
+        *create_test_series(data),
+        lambda ser: ser.describe(include=include, exclude=exclude),
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [
+            pd.NaT,
+            pd.Timestamp("1940-04-25 00:00:01"),
+            pd.Timestamp("2000-10-10 20:20:20"),
+            pd.Timestamp("2020-12-31 10:00:05"),
+        ],
+        [
+            dt.datetime(year=1900, month=1, day=1, hour=3, minute=4, second=5),
+            dt.datetime(year=1940, month=4, day=25, hour=0, minute=0, second=1),
+            dt.datetime(year=2000, month=10, day=10, hour=20, minute=20, second=20),
+            dt.datetime(year=2020, month=12, day=31, hour=10, minute=0, second=5),
+        ],
+    ],
+)
+# Datetime Series have 6 UNIONs for 7 computed statistics.
+# (count, mean, min, 0.25, 0.5, 0.75, max)
+@sql_count_checker(query_count=1, union_count=6)
+def test_describe_timestamps(data):
+    def timestamp_describe_comparator(snow_res, native_res):
+        # atol/rtol arguments of asserters doesn't work for datetimes
+        # Snowflake computed mean is very slightly different from pandas
+        # (1987-05-13 18:06:48.66666668 vs. 1987-05-13 18:06:48.666000)
+        # Perform exact comparison on other rows, and check the delta between means is small
+        snow_to_pandas = snow_res.to_pandas()
+        # assert_series_equal and assert_allclose are used here instead of assert_snowpark_pandas*
+        # helpers so we only call to_pandas() a single time
+        assert_series_equal(snow_to_pandas.drop(["mean"]), native_res.drop(["mean"]))
+        np.testing.assert_allclose(
+            snow_to_pandas.loc["mean"].timestamp(),
+            native_res.loc["mean"].timestamp(),
+        )
+
+    eval_snowpark_pandas_result(
+        *create_test_series(data),
+        lambda ser: ser.describe(),
+        comparator=timestamp_describe_comparator,
+    )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(None, id="default_index"),
+        pytest.param(["one", "two", "three", "four", "five", "six"], id="flat_index"),
+        pytest.param(
+            [
+                np.array(["bar", "bar", "baz", "baz", "foo", "foo"]),
+                np.array(["one", "two", "one", "two", "one", "two"]),
+            ],
+            id="2D_index",
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "data, expected_union_count",
+    [
+        ([-1, -3, 1, 14, 0, 100], 7),
+        ([3.1, 4.1, 5.9, 2.6, 5.3, np.nan], 7),
+        ([f"data{i}" for i in range(6)], 5),
+    ],
+    ids=["ints", "floats", "objects"],
+)
+def test_describe_multiindex(data, index, expected_union_count):
+    with SqlCounter(query_count=1, union_count=expected_union_count):
+        eval_snowpark_pandas_result(
+            *create_test_series(data, index=index), lambda ser: ser.describe()
+        )
diff --git a/tests/integ/modin/series/test_diff.py b/tests/integ/modin/series/test_diff.py
new file mode 100644
index 00000000000..7878195ba6a
--- /dev/null
+++ b/tests/integ/modin/series/test_diff.py
@@ -0,0 +1,159 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+SERIES_LENGTH = 32
+
+
+@sql_count_checker(query_count=0)
+def test_series_diff_axis_kwarg_fails_negative():
+    native_ser = native_pd.Series([])
+    snow_ser = pd.Series(native_ser)
+    # pandas always includes the name of the method
+    # when an unexpected kwarg is passed, but Snowpark
+    # pandas does not.
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.diff(axis=0),
+        expect_exception=True,
+        expect_exception_match=r"got an unexpected keyword argument 'axis'",
+        expect_exception_type=TypeError,
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_series_diff_invalid_periods_negative():
+    native_ser = native_pd.Series()
+    snow_ser = pd.Series(native_ser)
+
+    # Unclear why pandas allows period to be a float for Series but not
+    # DF.
+    # Filed https://github.com/pandas-dev/pandas/issues/56607.
+    with pytest.raises(ValueError, match="periods must be an integer"):
+        snow_ser.diff(0.5).to_pandas()
+
+    with pytest.raises(ValueError, match="periods must be an integer"):
+        snow_ser.diff("1").to_pandas()
+
+
+@pytest.mark.parametrize("ser_type", [bool, int, object])
+@pytest.mark.parametrize(
+    "periods",
+    [
+        -1,
+        0,
+        1,
+        SERIES_LENGTH / 2,
+        -1 * SERIES_LENGTH / 2,
+        SERIES_LENGTH - 1,
+        -1 * (SERIES_LENGTH - 1),
+        SERIES_LENGTH,
+        -1 * SERIES_LENGTH,
+    ],
+    ids=[
+        "with_row_after",
+        "with_self",
+        "with_row_before",
+        "with_len(df)/2_rows_before",
+        "with_len(df)/2_rows_after",
+        "with_len(df)-1_rows_before",
+        "with_len(df)-1_rows_after",
+        "with_len(df)_rows_before",
+        "with_len(df)_rows_after",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_diff(ser_type, periods):
+    native_ser = native_pd.Series(np.arange(SERIES_LENGTH)).astype(ser_type)
+    snow_ser = pd.Series(native_ser).astype(ser_type)
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.diff(periods=periods)
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_diff_mixed_ints_and_bools():
+    bool_indices = [1, 3, 12, 15, 19, 20, 22, 23, 30]
+    native_ser = native_pd.Series(
+        [i if i not in bool_indices else bool(i) for i in range(SERIES_LENGTH)]
+    )
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.diff())
+
+
+@sql_count_checker(query_count=0)
+def test_series_diff_string_type_negative():
+    native_ser = native_pd.Series(["a", "b", "c", "d"])
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.diff(),
+        expect_exception=True,
+        expect_exception_type=SnowparkSQLException,
+        expect_exception_match="Numeric value 'a' is not recognized",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_series_diff_custom_object_negative():
+    class CustomObject(dict):
+        def __rsub__(self, other):
+            if isinstance(other, bool):
+                return 3
+            elif isinstance(other, type(self)):
+                return "self"
+            else:
+                return 4
+
+        def __sub__(self, other):
+            if isinstance(other, bool):
+                return -3
+            elif isinstance(other, type(self)):
+                return "self"
+            else:
+                return -4
+
+    native_ser = native_pd.Series([CustomObject(), True, 3, CustomObject()])
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(
+        SnowparkSQLException, match=r"Failed to cast variant value \{\} to REAL"
+    ):
+        snow_ser.diff().to_pandas()
+
+
+@sql_count_checker(query_count=1)
+def test_series_diff_variant_with_na_values():
+    native_ser = native_pd.Series(
+        [1, 2, True, None, 4, np.nan, 5, None, np.nan, False, np.nan, 6, None]
+    )
+    snow_ser = pd.Series(native_ser)
+    # Vanilla pandas fails because we attempt to subtract None and a bool, but Snowpark pandas
+    # succeeds.
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        snow_ser.diff(), native_pd.Series([np.nan, 1, -1] + [np.nan] * 10)
+    )
+
+
+@sql_count_checker(query_count=6)
+def test_series_diff_strided_row_access():
+    native_series = native_pd.Series(np.arange(1000))
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.iloc[::2].diff()
+    )
diff --git a/tests/integ/modin/series/test_drop_duplicates.py b/tests/integ/modin/series/test_drop_duplicates.py
new file mode 100644
index 00000000000..df640295518
--- /dev/null
+++ b/tests/integ/modin/series/test_drop_duplicates.py
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_series_equal
+
+
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@sql_count_checker(query_count=1, join_count=2)
+def test_drop_duplicates(keep):
+    pandas_ser = native_pd.Series(["a", "b", "b", "c", "a"], name="name")
+    snow_ser = pd.Series(pandas_ser)
+
+    assert_series_equal(
+        snow_ser.drop_duplicates(keep=keep),
+        pandas_ser.drop_duplicates(keep=keep),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@sql_count_checker(query_count=1, join_count=2)
+def test_drop_duplicates_on_empty_series(keep):
+    pandas_ser = native_pd.Series([], name="name")
+    snow_ser = pd.Series(pandas_ser)
+
+    assert_series_equal(
+        snow_ser.drop_duplicates(keep=keep),
+        pandas_ser.drop_duplicates(keep=keep),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "keep, expected",
+    [
+        ("first", native_pd.Series([np.nan, 3], index=[0, 1])),
+        ("last", native_pd.Series([3, np.nan], index=[2, 4])),
+        (False, native_pd.Series([])),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_drop_duplicates_nan_none(keep, expected):
+    # Note that Snowpark pandas treats np.nan and None the same
+    ser = pd.Series([np.nan, 3, 3, None, np.nan], dtype=object)
+
+    result = ser.drop_duplicates(keep=keep)
+    assert_series_equal(
+        result,
+        expected,
+        check_dtype=False,
+        check_index_type=False,
+    )
diff --git a/tests/integ/modin/series/test_dropna.py b/tests/integ/modin/series/test_dropna.py
new file mode 100644
index 00000000000..bb59fe910d3
--- /dev/null
+++ b/tests/integ/modin/series/test_dropna.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "sample, expected_query_count",
+    (
+        ([1.0, 2.0, np.nan], 1),
+        ([np.NaN, 2, pd.NaT, "", None, "I stay"], 1),
+    ),
+)
+def test_basic(sample, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda s: s.dropna(),
+        )
+
+
+@pytest.mark.parametrize("sample", ([1.0, 2.0, np.nan],))
+@sql_count_checker(query_count=2)
+def test_inplace(sample):
+    s = pd.Series(sample)
+    s2 = s.dropna()
+    s.dropna(inplace=True)
+    assert_series_equal(s, s2)
+
+
+@pytest.mark.parametrize("sample", ([1.0, 2.0, np.nan],))
+@sql_count_checker(query_count=1)
+def test_unused_arguments(sample):
+    eval_snowpark_pandas_result(
+        pd.Series(sample),
+        native_pd.Series(sample),
+        lambda s: s.dropna(axis="columns"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="No axis named columns for object type Series",
+    )
+
+    eval_snowpark_pandas_result(
+        pd.Series(sample),
+        native_pd.Series(sample),
+        lambda s: s.dropna(how="any"),
+    )
+
+
+@pytest.mark.parametrize(
+    "ser",
+    [
+        native_pd.Series([1, None, 3, 4, None, 6, None, 8, None]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dropna_iloc(ser):
+    # Check that dropna() generates a new index correctly for iloc.
+    native_res = ser.dropna()
+    snowpark_res = pd.Series(ser).dropna()
+    indices = range(native_res.count())
+    assert_snowpark_pandas_equal_to_pandas(
+        snowpark_res.iloc[indices], native_res.iloc[indices]
+    )
+
+
+@pytest.mark.parametrize(
+    "ser",
+    [
+        native_pd.Series([None, "string 1", "string 2", None, None, "string 3", None]),
+        native_pd.Series(
+            [
+                999,
+                -10,
+                None,
+                3.14,
+                np.nan,
+                88888,
+                -1,
+                None,
+                np.nan,
+                None,
+                0,
+                10,
+                None,
+            ]
+        ),
+        native_pd.Series([np.nan, None, 0]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dropna_sort_values(ser):
+    # Test data that does not start with a row_position_column due to sorting.
+    eval_snowpark_pandas_result(
+        pd.Series(ser).sort_values(),
+        ser.sort_values(),
+        lambda s: s.dropna(),
+    )
diff --git a/tests/integ/modin/series/test_dt_accessor.py b/tests/integ/modin/series/test_dt_accessor.py
new file mode 100644
index 00000000000..a63f752d4f7
--- /dev/null
+++ b/tests/integ/modin/series/test_dt_accessor.py
@@ -0,0 +1,116 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+dt_properties = pytest.mark.parametrize(
+    "property_name",
+    ["date", "hour", "minute", "second", "year", "month", "day", "quarter"],
+)
+
+
+@pytest.mark.parametrize(
+    "datetime_index_value",
+    [
+        ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
+        ["04/04/2014", "07/18/2013", "11/22/2015"],
+        ["2014-04-04 23:56", pd.NaT, "2014-07-18 21:24", "2015-11-22 22:14", pd.NaT],
+        [
+            pd.Timestamp(2017, 1, 1, 12),
+            pd.Timestamp(2018, 2, 1, 10),
+            pd.Timestamp(2000, 2, 1, 10),
+        ],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_date(datetime_index_value):
+    native_ser = native_pd.Series(native_pd.DatetimeIndex(datetime_index_value))
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.dt.date)
+
+
+@dt_properties
+@sql_count_checker(query_count=1)
+def test_dt_property_with_tz(property_name):
+    datetime_index = native_pd.DatetimeIndex(
+        [
+            "2014-04-04 23:56",
+            "2014-07-18 21:24",
+            "2015-11-22 22:14",
+            "2015-11-23",
+            pd.NaT,
+        ],
+        tz="US/Eastern",
+    )
+    native_ser = native_pd.Series(datetime_index)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: getattr(ser.dt, property_name)
+    )
+
+
+@dt_properties
+@pytest.mark.parametrize("freq", ["d", "h", "min", "s", "y", "m", "D", "3m"])
+@sql_count_checker(query_count=1)
+def test_dt_properties(property_name, freq):
+    native_ser = native_pd.Series(
+        native_pd.date_range(start="2021-01-01", periods=5, freq=freq),
+        index=[2, 6, 7, 8, 11],
+        name="test",
+    )
+    native_ser.iloc[2] = native_pd.NaT
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: getattr(ser.dt, property_name)
+    )
+
+
+@pytest.mark.parametrize(
+    "data, data_type",
+    [
+        ([1, 2, 3, 4, 5], "int"),
+        ([1.1, 2.0, None, 4.0, 5.3], "float"),
+        (["a", "b", "c", "dd"], None),
+        (
+            [
+                datetime.date(2019, 12, 4),
+                datetime.date(2019, 12, 5),
+                datetime.date(2019, 12, 6),
+            ],
+            None,
+        ),
+        (
+            [
+                datetime.time(11, 12, 13),
+                datetime.time(12, 21, 5),
+                datetime.time(5, 2, 6),
+            ],
+            None,
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_dt_invalid_dtypes(data, data_type):
+    native_ser = native_pd.Series(data)
+    if data_type:
+        native_ser.astype(data_type)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.dt,
+        expect_exception=True,
+        expect_exception_match="Can only use .dt accessor with datetimelike values",
+    )
diff --git a/tests/integ/modin/series/test_dt_accessor_unsupported.py b/tests/integ/modin/series/test_dt_accessor_unsupported.py
new file mode 100644
index 00000000000..5fba75b34c4
--- /dev/null
+++ b/tests/integ/modin/series/test_dt_accessor_unsupported.py
@@ -0,0 +1,102 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+# TODO (SNOW-863790): This test file comes from pandas/tests/series/accessors/test_dt_accessor.py.
+#              Pull all tests from this file to enhance the coverage for series.dt methods
+class TestSeriesDatetimeValues:
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize("freq", ["D", "s", "ms"])
+    @sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+    def test_dt_namespace_accessor_datetime64(self, freq):
+        # GH#7207, GH#11128
+        # test .dt namespace accessor
+
+        # datetimeindex
+        dti = native_pd.date_range("20130101", periods=5, freq=freq)
+        ser = pd.Series(dti, name="xxx")
+
+        freq_result = ser.dt.freq
+        assert freq_result == native_pd.DatetimeIndex(ser.values, freq="infer").freq
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize(
+        "method, dates",
+        [
+            ["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
+            ["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
+            ["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_dt_round(self, method, dates):
+        # round
+        ser = pd.Series(
+            native_pd.to_datetime(
+                ["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
+            ),
+            name="xxx",
+        )
+        result = getattr(ser.dt, method)("D")
+        expected = native_pd.Series(native_pd.to_datetime(dates), name="xxx")
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize(
+        "date, format_string, expected",
+        [
+            (
+                native_pd.date_range("20130101", periods=5),
+                "%Y/%m/%d",
+                native_pd.Series(
+                    [
+                        "2013/01/01",
+                        "2013/01/02",
+                        "2013/01/03",
+                        "2013/01/04",
+                        "2013/01/05",
+                    ]
+                ),
+            ),
+            (
+                native_pd.date_range("2015-02-03 11:22:33.4567", periods=5),
+                "%Y/%m/%d %H-%M-%S",
+                native_pd.Series(
+                    [
+                        "2015/02/03 11-22-33",
+                        "2015/02/04 11-22-33",
+                        "2015/02/05 11-22-33",
+                        "2015/02/06 11-22-33",
+                        "2015/02/07 11-22-33",
+                    ]
+                ),
+            ),
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_strftime(self, date, format_string, expected):
+        # GH 10086
+        ser = pd.Series(date)
+        result = ser.dt.strftime(format_string)
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
diff --git a/tests/integ/modin/series/test_duplicated.py b/tests/integ/modin/series/test_duplicated.py
new file mode 100644
index 00000000000..fb146826acb
--- /dev/null
+++ b/tests/integ/modin/series/test_duplicated.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.mark.parametrize(
+    "keep, expected",
+    [
+        ("first", native_pd.Series([False, False, True, False, True], name="name")),
+        ("last", native_pd.Series([True, True, False, False, False], name="name")),
+        (False, native_pd.Series([True, True, True, False, True], name="name")),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_duplicated_keep(keep, expected):
+    ser = pd.Series(["a", "b", "b", "c", "a"], name="name")
+    result = ser.duplicated(keep=keep)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.parametrize(
+    "keep, expected",
+    [
+        ("first", native_pd.Series([False, False, True, True, True])),
+        ("last", native_pd.Series([True, True, False, True, False])),
+        (False, native_pd.Series([True, True, True, True, True])),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_duplicated_nan_none(keep, expected):
+    # Note that Snowpark pandas treats np.nan and None the same
+    ser = pd.Series([np.nan, 3, 3, None, np.nan], dtype=object)
+
+    result = ser.duplicated(keep=keep)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
diff --git a/tests/integ/modin/series/test_empty.py b/tests/integ/modin/series/test_empty.py
new file mode 100644
index 00000000000..60f73af4912
--- /dev/null
+++ b/tests/integ/modin/series/test_empty.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "args, kwargs",
+    [
+        ([{"A": 1}], {}),
+        ([{"A": []}], {}),
+        ([[]], {}),
+        ([[np.nan]], {}),
+        ([np.nan], {}),
+        ([None], {}),
+        ([], {"index": []}),
+    ],
+    ids=[
+        "simple non-empty",
+        "empty column",
+        "no name empty column",
+        "no name only containing np.nan column",
+        "data only has np.nan",
+        "data is None",
+        "empty series with only index",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_empty(args, kwargs):
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.empty,
+        comparator=lambda x, y: x == y,
+    )
+
+
+@sql_count_checker(query_count=5)
+def test_empty_series_type():
+    def check_dtype(series):
+        assert series.to_pandas().dtype == series.dtype
+
+    check_dtype(pd.Series())
+    check_dtype(pd.Series([]))
+    check_dtype(pd.Series([], name="A"))
+    check_dtype(pd.Series([], index=pd.Index([], dtype="int64")))
+    check_dtype(pd.Series([], name="A", index=pd.Index([], dtype="int64")))
diff --git a/tests/integ/modin/series/test_ffill.py b/tests/integ/modin/series/test_ffill.py
new file mode 100644
index 00000000000..31ff960dbbc
--- /dev/null
+++ b/tests/integ/modin/series/test_ffill.py
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@sql_count_checker(query_count=1)
+def test_series_ffill():
+    native_s = native_pd.Series([1, np.nan, 2, 3])
+    snow_s = pd.Series(native_s)
+    eval_snowpark_pandas_result(
+        snow_s,
+        native_s,
+        lambda s: s.ffill(),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_pad():
+    native_s = native_pd.Series([1, np.nan, 2, 3])
+    snow_s = pd.Series(native_s)
+    eval_snowpark_pandas_result(
+        snow_s,
+        native_s,
+        lambda s: s.pad(),
+    )
diff --git a/tests/integ/modin/series/test_fillna.py b/tests/integ/modin/series/test_fillna.py
new file mode 100644
index 00000000000..5f821986c18
--- /dev/null
+++ b/tests/integ/modin/series/test_fillna.py
@@ -0,0 +1,172 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture(scope="function")
+def test_fillna_series():
+    return native_pd.Series([np.nan, 2, np.nan, 0], list("bacd"))
+
+
+@pytest.fixture(scope="function")
+def test_fillna_series_2():
+    return native_pd.Series([np.nan, 2, np.nan, 0], list("abcd"))
+
+
+@pytest.fixture(scope="function")
+def test_fillna_df():
+    return native_pd.DataFrame(
+        [
+            [np.nan, 2, np.nan, 0],
+            [3, 4, np.nan, 1],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, 3, np.nan, 4],
+        ],
+        columns=list("ABCD"),
+        index=[1, np.nan, 2, np.nan],
+    )
+
+
+@pytest.fixture(scope="function")
+def test_fillna_series_dup():
+    return native_pd.Series([np.nan, 2, np.nan, 0], list("aacd"))  # duplicated index
+
+
+@sql_count_checker(query_count=0)
+def test_fillna_for_both_value_and_method_None_negative():
+    native_ser = native_pd.Series()
+    snow_ser = pd.Series()
+
+    # Check error when `value` and `method` are both `None`.
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda s: s.fillna(),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_match="Must specify a fill 'value' or 'method'.",
+        expect_exception_type=ValueError,
+    )
+
+    # Check error when `value` and `method` are both *not* `None`.
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda s: s.fillna(value=1, method="ffill"),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Cannot specify both 'value' and 'method'.",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_fillna_invalid_method_negative():
+    native_ser = native_pd.Series()
+    snow_ser = pd.Series()
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda s: s.fillna(method="invalid_method"),
+        expect_exception=True,
+        assert_exception_equal=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=r"Invalid fill method. Expecting pad \(ffill\) or backfill \(bfill\)\. Got invalid_method",
+    )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@sql_count_checker(query_count=1)
+def test_fillna_method(test_fillna_series, method):
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series),
+        test_fillna_series,
+        lambda s: s.fillna(method=method),
+    )
+
+
+@pytest.mark.parametrize("method", ["ffill", "pad", "bfill", "backfill"])
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3] + [np.nan] * 1000 + [4, 5, 6] + [np.nan],
+        [np.nan] * 1000 + [4, 5, 6] + [np.nan] + [1, 2, 3],
+    ],
+    ids=["ends_with_nan", "starts_with_nan"],
+)
+@sql_count_checker(query_count=6)
+def test_fillna_method_long_series(method, data):
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda s: s.fillna(method=method),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_value_scalar(test_fillna_series):
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series),
+        test_fillna_series,
+        lambda s: s.fillna(1),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_value_series(test_fillna_series, test_fillna_series_2):
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series),
+        test_fillna_series,
+        lambda s: s.fillna(pd.Series(test_fillna_series_2))
+        if isinstance(s, pd.Series)
+        else s.fillna(test_fillna_series_2),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_value_dict(test_fillna_series, test_fillna_series_2):
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series),
+        test_fillna_series,
+        lambda s: s.fillna(test_fillna_series_2.to_dict()),
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_value_series_dup(test_fillna_series_dup, test_fillna_series_2):
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series_dup),
+        test_fillna_series_dup,
+        lambda s: s.fillna(pd.Series(test_fillna_series_2))
+        if isinstance(s, pd.Series)
+        else s.fillna(test_fillna_series_2),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_argument_negative(test_fillna_series, test_fillna_df):
+    # df is not allowed as values for series fillna
+    eval_snowpark_pandas_result(
+        pd.Series(test_fillna_series),
+        test_fillna_series,
+        lambda s: s.fillna(pd.DataFrame(test_fillna_df))
+        if isinstance(s, pd.Series)
+        else s.fillna(test_fillna_df),
+        expect_exception=True,
+        expect_exception_match='"value" parameter must be a scalar, dict or Series',
+        expect_exception_type=TypeError,
+        assert_exception_equal=False,
+    )
diff --git a/tests/integ/modin/series/test_first_last_valid_index.py b/tests/integ/modin/series/test_first_last_valid_index.py
new file mode 100644
index 00000000000..1e8d052e10f
--- /dev/null
+++ b/tests/integ/modin/series/test_first_last_valid_index.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+
+
+@pytest.mark.parametrize(
+    "native_series",
+    [
+        native_pd.Series([None, None]),
+        native_pd.Series(),
+        native_pd.Series([None, 1, None]),
+        native_pd.Series([np.nan, 3, np.nan]),
+        native_pd.Series([1, 2, 3], index=[10, 11, 12]),
+        native_pd.Series([1, 2, 3], index=[13, 10, 15]),
+        native_pd.Series([5, 6, 7, 8], index=["i", "am", "iron", "man"]),
+        native_pd.Series([None, None, 2], index=[None, 1, 2]),
+        native_pd.Series([None, None, 2], index=[None, None, None]),
+    ],
+)
+def test_first_and_last_valid_index_series(native_series):
+    snow_series = pd.Series(native_series)
+    with SqlCounter(query_count=1):
+        assert native_series.first_valid_index() == snow_series.first_valid_index()
+    with SqlCounter(query_count=1):
+        assert native_series.last_valid_index() == snow_series.last_valid_index()
+
+
+def test_first_and_last_valid_none_float64index_series():
+    native_series = native_pd.Series([None, 2, None], index=[None, None, 3])
+    snow_series = pd.Series(native_series)
+    # Resulting value for valid index should be np.nan since Float64Index
+    # Convert to str and compare since nan != nan
+    with SqlCounter(query_count=1):
+        assert str(native_series.first_valid_index()) == str(
+            snow_series.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert str(native_series.last_valid_index()) == str(
+            snow_series.last_valid_index()
+        )
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        [
+            np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
+            np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
+        ],
+        [
+            [None] * 8,
+            [None] * 8,
+        ],
+        [
+            [None, 1, 2, 3, 4, 5, 6, None],
+            [5] * 8,
+        ],
+        [
+            np.array(["None", "bar", "baz", "baz", "foo", "foo", "qux", "None"]),
+            np.array(["None", "two", "one", "two", "one", "two", "one", "None"]),
+        ],
+    ],
+)
+def test_first_and_last_valid_multiindex_series(index):
+    native_series = native_pd.Series(np.random.randn(8), index=index)
+    snow_series = pd.DataFrame(native_series)
+    # Should return a tuple in Multiindex case
+    with SqlCounter(query_count=1):
+        assert native_series.first_valid_index() == snow_series.first_valid_index()
+    with SqlCounter(query_count=1):
+        assert native_series.last_valid_index() == snow_series.last_valid_index()
+
+
+@pytest.mark.parametrize("data", [[10, 11, 12], [None, 11, None]])
+def test_first_and_last_valid_none_float64_multiindex_series(data):
+    arrays = [
+        [None, 3.0, None],
+        [None, 5.0, None],
+    ]
+    native_series = native_pd.Series(data, index=arrays)
+    snow_series = pd.Series(native_series)
+    with SqlCounter(query_count=1):
+        assert str(native_series.first_valid_index()) == str(
+            snow_series.first_valid_index()
+        )
+    with SqlCounter(query_count=1):
+        assert str(native_series.last_valid_index()) == str(
+            snow_series.last_valid_index()
+        )
+
+
+def test_first_and_last_valid_nested_multiindex_series():
+    arrays = [
+        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+        ["one", "two", "one", "two", "one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    native_series = native_pd.Series(np.random.randn(6), index=index[:6])
+    snow_series = pd.Series(native_series)
+    with SqlCounter(query_count=1):
+        assert native_series.first_valid_index() == snow_series.first_valid_index()
+    with SqlCounter(query_count=1):
+        assert native_series.last_valid_index() == snow_series.last_valid_index()
diff --git a/tests/integ/modin/series/test_getattr.py b/tests/integ/modin/series/test_getattr.py
new file mode 100644
index 00000000000..f97a734a819
--- /dev/null
+++ b/tests/integ/modin/series/test_getattr.py
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import inspect
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import assert_series_equal
+
+
+@pytest.mark.parametrize(
+    "name, expected_query_count, expected_join_count",
+    [
+        ("a", 2, 2),
+        ("index", 1, 0),
+        ("mean", 0, 0),
+    ],
+)
+def test_getattr(name, expected_query_count, expected_join_count):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        native = native_pd.Series([1, 2, 2], index=["a", "b", "c"])
+        snow = pd.Series(native)
+        snow_res = getattr(snow, name)
+        native_res = getattr(native, name)
+
+        if isinstance(snow_res, pd.Series):
+            assert_series_equal(snow_res, native_res, check_dtype=False)
+        elif isinstance(snow_res, pd.Index):
+            assert_index_equal(snow_res, native_res)
+        elif inspect.ismethod(snow_res):
+            # e.g., mean will return bound method similar to pandas
+            assert inspect.ismethod(native_res)
+            assert type(snow_res) == type(native_res)
+        else:
+            assert snow_res == native_res
+
+
+@pytest.mark.parametrize(
+    "name, expected_query_count",
+    [
+        ("columns", 1),
+        ("unknown", 1),
+        ("____id_pack__", 0),
+        ("__name__", 0),
+        ("_cache", 0),
+    ],  # _ATTRS_NO_LOOKUP
+)
+def test_getattr_negative(name, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        native = native_pd.Series([1, 2, 2], index=["a", "b", "c"])
+        with pytest.raises(AttributeError, match="has no attribute"):
+            getattr(native, name)
+
+        snow = pd.Series(native)
+        with pytest.raises(AttributeError):
+            getattr(snow, name)
diff --git a/tests/integ/modin/series/test_getitem.py b/tests/integ/modin/series/test_getitem.py
new file mode 100644
index 00000000000..feb22fdbb4f
--- /dev/null
+++ b/tests/integ/modin/series/test_getitem.py
@@ -0,0 +1,203 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import random
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True, True, False, False, False, True, True],
+        [True] * 7,
+        [False] * 7,
+        np.array([True, True, False, False, False, True, True], dtype=bool),
+        pd.Index([True, True, False, False, False, True, True]),
+        [True],
+        [True, True, False, False, False, True, True, True],
+        pd.Index([], dtype=bool),
+        np.array([], dtype=bool),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_getitem_with_boolean_list_like(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    def getitem_helper(ser):
+        # Native pandas can only handle boolean list-likes objects of length = num(rows).
+        if isinstance(ser, native_pd.Series):
+            # If native pandas Series, truncate the series and key.
+            _ser = ser[: len(key)]
+            _key = key[: len(_ser)]
+        else:
+            _key, _ser = key, ser
+        return _ser[_key]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        getitem_helper,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [0],
+        [-1],
+        # unsorted with duplicates
+        [2, 3, -1, 3, 2, 1],
+        [random.choice(range(-20, 20)) for _ in range(random.randint(1, 20))],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_getitem_with_int_list_like(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    def getitem_helper(series):
+        if isinstance(series, native_pd.Series):
+            # Native pandas only supports non-negative integer values in range. Snowpark ignores them.
+            # If native pandas DataFrame, remove negative and out-of-bound values to avoid errors and compare.
+            _key = [k for k in key if 0 <= k <= 6]
+        else:
+            _key = key
+        return series[_key]
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        getitem_helper,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [-5],
+        [-1, -2],
+        [-2, -3, -1, -3, -2, -1],
+        [random.choice(range(-20, 0)) for _ in range(random.randint(1, 20))],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_getitem_with_int_list_like_returns_empty_series(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser[[]] if isinstance(ser, native_pd.Series) else ser[key],
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+@pytest.mark.parametrize("key", [0, 6, 100, 3, 5, -6, -7.2, 6.5, -120.3, 23.9])
+def test_series_getitem_with_scalars(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    # Native pandas only supports non-negative integer values in range. Snowpark ignores them.
+    def getitem_helper(series):
+        if isinstance(series, native_pd.Series):
+            _key = key if 0 <= key <= 6 else []  # returns a scalar value
+        else:
+            _key = key
+        return series[_key]
+
+    if 0 <= key <= 6:
+        snowpark_res = getitem_helper(default_index_snowpark_pandas_series)
+        native_res = getitem_helper(default_index_native_series)
+        assert snowpark_res == (
+            native_res if not isinstance(native_res, tuple) else list(native_res)
+        )
+    else:
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_series,
+            default_index_native_series,
+            getitem_helper,
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        pd.Index([]),
+        np.array([]),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_getitem_with_empty_keys(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser[key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice(1, 4, 2),  # start < stop, step > 0
+        slice(1, 4, -2),  # start < stop, step < 0
+        slice(-1, -4, 2),  # start > stop, step > 0
+        slice(-1, -4, -2),  # start > stop, step < 0
+        slice(3, -1, 4),
+        slice(5, 1, -36897),
+        # start = step
+        slice(3, -1, 4),  # step > 0
+        slice(100, 100, 1245),  # step > 0
+        slice(-100, -100, -3),  # step < 0
+        slice(-100, -100, -36897),  # step < 0
+        slice(2, 1, -2),
+        # with None
+        slice(None, 2, 1),
+        slice(-100, None, -2),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_getitem_with_slice(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser[key],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        np.array([True, True, False, False, False, True, True], dtype=bool),
+        [],
+        slice(2, 6, 4),
+        "baz",
+        ["foo", "bar"],
+        [("foo", "one"), ("bar", "two")],
+    ],
+)
+def test_df_getitem_with_multiindex(
+    key, default_index_native_series, multiindex_native
+):
+    expected_join_count = 0 if isinstance(key, slice) or isinstance(key, str) else 1
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        # Test __getitem__ with series with MultiIndex index.
+        native_ser = default_index_native_series.reindex(multiindex_native)
+        snowpark_ser = pd.Series(native_ser)
+        eval_snowpark_pandas_result(
+            snowpark_ser,
+            native_ser,
+            lambda ser: ser[key],
+            check_index_type=False,
+        )
diff --git a/tests/integ/modin/series/test_head_tail.py b/tests/integ/modin/series/test_head_tail.py
new file mode 100644
index 00000000000..63cb864396b
--- /dev/null
+++ b/tests/integ/modin/series/test_head_tail.py
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pytest
+
+from tests.integ.modin.frame.test_head_tail import eval_result_and_query_with_no_join
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "n",
+    [1, None, 0, -1, -10, 5, 10],
+)
+@sql_count_checker(query_count=2)
+def test_head_tail(
+    n, default_index_snowpark_pandas_series, default_index_native_series
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+
+@pytest.mark.parametrize(
+    "n",
+    [1, None, 0, -1, -10, 5, 10],
+)
+@sql_count_checker(query_count=2)
+def test_empty_dataframe(n, empty_snowpark_pandas_series, empty_pandas_series):
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_series,
+        empty_pandas_series,
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
+
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_series,
+        empty_pandas_series,
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
diff --git a/tests/integ/modin/series/test_idxmax_idxmin.py b/tests/integ/modin/series/test_idxmax_idxmin.py
new file mode 100644
index 00000000000..ff7f74b3369
--- /dev/null
+++ b/tests/integ/modin/series/test_idxmax_idxmin.py
@@ -0,0 +1,61 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data, index",
+    [
+        ([1, None, 4, 3, 4], ["A", "B", "C", "D", "E"]),
+        ([1, None, 4, 3, 4], [None, "B", "C", "D", "E"]),
+        ([1, 10, 4, 3, 4], ["E", "D", "C", "A", "B"]),
+    ],
+)
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize(
+    "skipna",
+    [
+        True,
+        pytest.param(
+            False,
+            marks=pytest.mark.xfail(
+                reason="When the data column is None, Snowpark pandas returns None instead of nan"
+            ),
+        ),
+    ],
+)
+def test_idxmax_idxmin_series(data, index, func, skipna):
+    native_series = native_pd.Series(data=data, index=index)
+    snow_series = pd.Series(native_series)
+
+    native_output = native_series.__getattribute__(func)(axis=0, skipna=skipna)
+    snow_output = snow_series.__getattribute__(func)(axis=0, skipna=skipna)
+    assert native_output == snow_output
+
+
+@pytest.mark.parametrize("func", ["idxmax", "idxmin"])
+@pytest.mark.parametrize("skipna", [True, False])
+@sql_count_checker(query_count=0)
+def test_series_idxmax_idxmin_with_multiindex(
+    multiindex_native_int_series, func, skipna
+):
+    """
+    Test DataFrameGroupBy.idxmax and DataFrameGroupBy.idxmin with a MultiIndex DataFrame.
+    Here, the MultiIndex DataFrames are grouped by `level` and not `by`.
+    """
+    # Create MultiIndex DataFrames.
+    native_series = multiindex_native_int_series
+    snow_series = pd.Series(native_series)
+    with pytest.raises(
+        NotImplementedError,
+        match=f"{func} is not yet supported when the index is a MultiIndex.",
+    ):
+        snow_series.__getattribute__(func)(axis=0, skipna=skipna)
diff --git a/tests/integ/modin/series/test_iloc.py b/tests/integ/modin/series/test_iloc.py
new file mode 100644
index 00000000000..0e7c572a2fa
--- /dev/null
+++ b/tests/integ/modin/series/test_iloc.py
@@ -0,0 +1,937 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import random
+import re
+from typing import Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import Series
+from pandas._libs.lib import is_scalar
+from pandas.errors import IndexingError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.frame.test_iloc import snowpark_pandas_input_keys
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+# default_index_native_series and default_index_snowpark_pandas_series have size axis_len x 1
+axis_len = 7
+setitem_key_val_pair = [
+    ([3, 1, 2], ["991", "992", "993"]),
+    ([3, 1, 2], "991"),
+]
+TEST_ITEMS_DATA_2X1 = [-999, -998]
+
+
+@sql_count_checker(query_count=1)
+def test_non_default_index_series_iloc():
+    indexed_native_series = native_pd.Series([1, 2, 3, 4, 5])
+    indexed_native_series.index = ["index1", "index2", "index3", "index4", "index5"]
+    indexed_snow_series = Series(indexed_native_series)
+    eval_snowpark_pandas_result(
+        indexed_snow_series,
+        indexed_native_series,
+        lambda ser: ser.iloc[1:3],
+    )
+
+
+@pytest.mark.parametrize(
+    "key, expected_join_count",
+    snowpark_pandas_input_keys,
+)
+def test_series_iloc_snowpark_pandas_input_return_dataframe(
+    key,
+    expected_join_count,
+    iloc_snowpark_pandas_input_map,
+    default_index_snowpark_pandas_series,
+    default_index_native_series,
+):
+    expected_query_count = 1 if "Index" in key else 2
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_series,
+            default_index_native_series,
+            lambda ser: ser.iloc[iloc_snowpark_pandas_input_map[key]],
+        )
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_diff2native(default_index_snowpark_pandas_series, default_index_native_series):
+    assert (
+        default_index_snowpark_pandas_series.iloc[..., 3]
+        == default_index_native_series.iloc[3]
+    )
+
+
+@pytest.mark.parametrize(
+    "key, val",
+    setitem_key_val_pair,
+)
+def test_setitem(
+    key,
+    val,
+    default_index_native_int_snowpark_pandas_series,
+    default_index_native_int_series,
+):
+    def operation(ser):
+        ser.iloc[key] = val
+        # Based on snowflake type results, the result becomes 'str' type so we normalize to float for comparison.
+        return ser.astype("float")
+
+    expected_join_count = 3 if isinstance(val, list) else 2
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_native_int_snowpark_pandas_series,
+            default_index_native_int_series,
+            operation,
+        )
+
+
+@pytest.mark.parametrize(
+    "key, val",
+    [
+        (3, 9),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=2)
+def test_setitem_scalar(
+    key, val, default_index_snowpark_pandas_series, default_index_native_series
+):
+    def operation(ser):
+        ser.iloc[key] = val
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        operation,
+        inplace=True,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=3)
+def test_iloc_setitem_snowpark_pandas_input(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    item = Series([7, 8, 9])
+    native_item = native_pd.Series([7, 8, 9])
+
+    def operation(ser):
+        if isinstance(ser, Series):
+            ser.iloc[[0, 1, 2]] = item
+        else:
+            ser.iloc[[0, 1, 2]] = native_item
+
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        operation,
+        inplace=True,
+    )
+
+
+# Types of input to test with get iloc.
+KEY_TYPES = ["list", "series", "index", "ndarray", "index with name"]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [True] * 7,
+        [False] * 7,
+        [random.choice([True, False]) for _ in range(7)],
+        # length mismatch
+        [random.choice([True, False]) for _ in range(random.randint(1, 7))],
+        [random.choice([True, False]) for _ in range(random.randint(8, 20))],
+    ],
+)
+@pytest.mark.parametrize("key_type", KEY_TYPES)
+def test_series_iloc_get_key_bool(
+    key, key_type, default_index_native_int_series, multiindex_native_int_series
+):
+    # Check whether Series.iloc[key] works on given Series with:
+    # - boolean list      - boolean Index (with and without name)
+    # - boolean Series    - np.ndarray
+    def iloc_helper(ser):
+        # Note:
+        # 1. boolean series key is not implemented in pandas, so we use list key to test it
+        # 2. if key length does not match with ser, Snowpark will only select the row position the key contains; while
+        # pandas will raise error, so we first truncate the Series for pandas and then compare the result
+        if isinstance(ser, native_pd.Series):
+            # If native pandas Series, truncate the series and key.
+            _ser = ser[: len(key)]
+            _key = key[: len(_ser)]
+        else:
+            _key, _ser = key, ser
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key, dtype=bool)
+        elif key_type == "ndarray":
+            _key = np.array(_key)
+        elif key_type == "index with name":
+            _key = pd.Index(_key, name="some name", dtype=bool)
+        elif key_type == "series" and isinstance(_ser, pd.Series):
+            # Native pandas does not support iloc with Snowpark Series.
+            _key = pd.Series(_key, dtype=bool)
+
+        return _ser.iloc[_key]
+
+    expected_join_count = 1
+    if key == [] and key_type in ["list", "ndarray"]:
+        expected_join_count += 1
+
+    default_index_int_series = pd.Series(default_index_native_int_series)
+    # test ser with non-default index
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            default_index_int_series,
+            default_index_native_int_series,
+            iloc_helper,
+        )
+
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    # test ser with non default index
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            int_series_with_non_default_index,
+            native_int_series_with_non_default_index,
+            iloc_helper,
+        )
+
+    # test ser with MultiIndex
+    int_series_with_multiindex = pd.Series(multiindex_native_int_series)
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            int_series_with_multiindex,
+            multiindex_native_int_series,
+            iloc_helper,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [0],
+        [-1],
+        # unsorted with duplicates
+        [2, 3, 1, 3, 2, 1],
+        [random.choice(range(-20, 20)) for _ in range(random.randint(1, 20))],
+        # implicitly support float
+        [-0.1, -1.9, 2.1, 2.6],
+        [random.uniform(-20, 20) for _ in range(random.randint(1, 20))],
+    ],
+)
+@pytest.mark.parametrize("key_type", KEY_TYPES)
+def test_series_iloc_get_key_numeric(
+    key, key_type, default_index_native_int_series, multiindex_native_int_series
+):
+    # Check whether Series.iloc[key] works on a given Series with:
+    # - numeric list      - numeric Index (with and without name)
+    # - numeric Series    - np.ndarray
+    def iloc_helper(ser):
+        if isinstance(ser, native_pd.Series):
+            # If native pandas Series, remove out-of-bound values to avoid errors and compare.
+            _key = [k for k in key if -8 < k < 7]
+        else:
+            _key = key
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key)
+        elif key_type == "ndarray":
+            _key = np.array(_key)
+        elif key_type == "index with name":
+            _key = pd.Index(_key, name="some name")
+        elif key_type == "series" and isinstance(ser, pd.Series):
+            # Native pandas does not support iloc with Snowpark Series.
+            _key = pd.Series(_key, dtype=float if len(key) == 0 else None)
+
+        return ser.iloc[_key]
+
+    if key == [] and "index" in key_type:
+        # There should be 0 queries in this case
+        with SqlCounter(query_count=0):
+            pass
+        # Index objects have dtype object when empty
+        return
+
+    default_index_int_series = pd.Series(default_index_native_int_series)
+    # test ser with default index
+    with SqlCounter(query_count=1, join_count=2):
+        eval_snowpark_pandas_result(
+            default_index_int_series,
+            default_index_native_int_series,
+            iloc_helper,
+        )
+
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    # test ser with non default index
+    with SqlCounter(query_count=1, join_count=2):
+        eval_snowpark_pandas_result(
+            int_series_with_non_default_index,
+            native_int_series_with_non_default_index,
+            iloc_helper,
+        )
+
+    # test ser with MultiIndex
+    # Index dtype is different between Snowpark and native pandas if key produces empty df.
+    int_series_with_multiindex = pd.Series(multiindex_native_int_series)
+    with SqlCounter(query_count=1, join_count=2):
+        eval_snowpark_pandas_result(
+            int_series_with_multiindex,
+            multiindex_native_int_series,
+            iloc_helper,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("key", [0, -3, 4, -7, 6, -8, 7, 52879115, -9028751])
+def test_series_iloc_get_key_scalar(
+    key, default_index_native_int_series, multiindex_native_int_series
+):
+    # Check whether Series.iloc[key] works with integer scalar keys.
+    def iloc_helper(ser):
+        if isinstance(ser, pd.Series) or -8 < key < 7:
+            return ser.iloc[key]
+        else:
+            return []  # Snowpark pandas return [] where key is out of bound
+
+    default_index_int_series = pd.Series(default_index_native_int_series)
+
+    # Two queries are run - one for getting count(), one for displaying results.
+    # One join is performed for each query.
+
+    # test ser with default index
+    with SqlCounter(query_count=1, join_count=2):
+        snowpark_res = iloc_helper(default_index_int_series)
+        native_res = iloc_helper(default_index_native_int_series)
+        assert snowpark_res == native_res
+
+    # test ser with non-default index
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    with SqlCounter(query_count=1, join_count=2):
+        snowpark_res = iloc_helper(int_series_with_non_default_index)
+        native_res = iloc_helper(native_int_series_with_non_default_index)
+        assert snowpark_res == native_res
+
+    # test ser with MultiIndex
+    int_series_with_multiindex = pd.Series(multiindex_native_int_series)
+    with SqlCounter(query_count=1, join_count=2):
+        snowpark_res = iloc_helper(int_series_with_multiindex)
+        native_res = iloc_helper(multiindex_native_int_series)
+        assert snowpark_res == native_res
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", [-7.2, 6.5, -120.3, 23.9])
+def test_series_iloc_get_with_float_scalar_negative(
+    key, default_index_snowpark_pandas_series, default_index_native_series
+):
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_series,
+        default_index_native_series,
+        lambda ser: ser.iloc[key],
+        expect_exception=True,
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key, error_msg",
+    [
+        (
+            native_pd.Series(["native", "pandas", "series", "of", "strings", ""]),
+            re.escape(
+                "<class 'pandas.core.series.Series'> is not supported as 'value' argument. "
+                + "Please convert this to Snowpark pandas objects by calling "
+                + "modin.pandas.Series()/DataFrame()"
+            ),
+        ),
+        (
+            native_pd.DataFrame({"A": [1, 2, 3, "hi"], "B": [0.9, -10, -5 / 6, "bye"]}),
+            re.escape(
+                "<class 'pandas.core.frame.DataFrame'> is not supported as 'value' argument. "
+                + "Please convert this to Snowpark pandas objects by calling "
+                + "modin.pandas.Series()/DataFrame()"
+            ),
+        ),
+        ([..., 0], re.escape("Object of type ellipsis is not JSON serializable")),
+        ((..., [..., 1]), "Object of type ellipsis is not JSON serializable"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_ellipses_and_native_pd_key_raises_type_error_negative(
+    key, error_msg, default_index_native_int_series
+):
+    # Check whether invalid keys passed in raise TypeError.
+    # Native pandas objects cannot be used as keys, ellipses should not be present in row/col objects.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    with pytest.raises(TypeError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key, error_msg",
+    [
+        (((1, 3), 0), "Too many indexers"),
+        ((1, 1, 1), "Too many indexers"),
+        (((0, 1), (0, 1)), "Too many indexers"),
+        ((..., ...), "indexer may only contain one '...' entry"),
+        ((..., ..., ...), "indexer may only contain one '...' entry"),
+        ((12, native_pd.Categorical([1, 3, 4])), "Too many indexers"),
+        ((native_pd.Categorical([1, 3, 4]), [0]), "Too many indexers"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_key_raises_indexing_error_negative(
+    key, error_msg, default_index_native_int_series
+):
+    # Check whether invalid keys passed in raise IndexError. Raised when tuples or Categorical objects
+    # are used as row/col objects, too many ellipses used, more than two values inside a tuple key.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    with pytest.raises(IndexingError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        slice(0, 0.1),  # stop is not an int
+        slice(1, 2, 3.5),  # step is not an int
+        slice(1.1, 2.1, 3),  # start and stop are not ints
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_invalid_slice_key_negative(
+    key, default_index_native_int_series
+):
+    # TypeError raised when non-integer scalars used as start, stop, or step in slice.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    error_msg = "cannot do positional indexing with these indexers"
+    with pytest.raises(TypeError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        None,
+        True,
+        False,
+        -3.14,
+        22 / 7,
+        np.nan,
+        np.array(["this", "is", "an", "ndarray!"]),
+        native_pd.Index(["index", "of", "strings"]),
+        pd.Index([]),
+        pd.Index([], dtype=str),
+        "string",
+        "test",
+        ["list", "of", "strings"],
+        np.array([1.2, None, "hi"]),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_non_numeric_key_negative(key, default_index_native_int_series):
+    # Check whether invalid non-numeric keys passed in raise TypeError.
+    # list-like objects need to be numeric, scalar keys can only be integers.
+    # Native pandas Series and DataFrames are invalid inputs.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    error_msg = re.escape(f".iloc requires numeric indexers, got {key}")
+    with pytest.raises(IndexError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_key_snowpark_df_input_negative(
+    default_index_native_int_series,
+):
+    # Verify that Snowpark DataFrame is invalid input.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    key = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    error_msg = "DataFrame indexer is not allowed for .iloc\nConsider using .loc for automatic alignment."
+    with pytest.raises(IndexError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@sql_count_checker(query_count=1)
+def test_series_iloc_get_key_snowpark_empty_str_series_input_negative(
+    default_index_native_int_series,
+):
+    # Verify that Empty Snowpark Series of string type is invalid input.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    key = pd.Series([], dtype=str)
+    error_msg = re.escape(
+        ".iloc requires numeric indexers, got Series([], dtype: object)"
+    )
+    with pytest.raises(IndexError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        native_pd.Categorical((1, 3, -1)),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_key_raises_not_implemented_error_negative(
+    key, default_index_native_int_series
+):
+    # Verify that Categorical types raises NotImplementedError.
+    snowpark_index_int_series = pd.Series(default_index_native_int_series)
+    error_msg = re.escape("pandas type category is not implemented")
+    with pytest.raises(NotImplementedError, match=error_msg):
+        _ = snowpark_index_int_series.iloc[key]
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_series_iloc_get_empty(empty_snowpark_pandas_series):
+    _ = empty_snowpark_pandas_series.iloc[0]
+
+
+TEST_DATA_FOR_ILOC_GET_SLICE = [0, -10, -2, -1, None, 1, 2, 10]
+
+
+@pytest.mark.parametrize("start", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("stop", TEST_DATA_FOR_ILOC_GET_SLICE)
+@pytest.mark.parametrize("step", TEST_DATA_FOR_ILOC_GET_SLICE[1:])
+@sql_count_checker(query_count=2)
+def test_series_iloc_get_slice(default_index_native_int_series, start, stop, step):
+    default_index_int_series = pd.Series(default_index_native_int_series)
+    # test ser with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_int_series,
+            default_index_native_int_series,
+            lambda ser: ser.iloc[slice(start, stop, step)],
+        )
+
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    # test ser with non default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            int_series_with_non_default_index,
+            native_int_series_with_non_default_index,
+            lambda ser: ser.iloc[slice(start, stop, step)],
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_slice_with_invalid_step_negative():
+    snowpark_ser = pd.Series([1, 2, 3, 4])
+    with pytest.raises(ValueError, match="slice step cannot be zero."):
+        _ = snowpark_ser.iloc[slice(None, None, 0)]
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize(
+    "slice_key",
+    [
+        slice("a", None, None),
+        slice(None, "b", None),
+        slice(None, None, "c"),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_iloc_get_slice_with_non_integer_parameters_negative(slice_key):
+    snowpark_ser = pd.Series([1, 2, 3, 4])
+    with pytest.raises(
+        TypeError, match="cannot do positional indexing with these indexers"
+    ):
+        _ = snowpark_ser.iloc[slice_key]
+
+
+@pytest.mark.parametrize(
+    "range_key",
+    [
+        range(1, 4, 2),  # start < stop, step > 0
+        range(1, 4, -2),  # start < stop, step < 0
+        range(-1, -4, 2),  # start > stop, step > 0
+        range(-1, -4, -2),  # start > stop, step < 0
+        range(3, -1, 4),
+        range(5, 1, -36897),
+        # start = step
+        range(3, -1, 4),  # step > 0
+        range(100, 100, 1245),  # step > 0
+        range(-100, -100, -3),  # step < 0
+        range(-100, -100, -36897),  # step < 0
+        range(2, 1, -2),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_series_iloc_get_range(default_index_native_int_series, range_key):
+    default_index_int_series = pd.Series(default_index_native_int_series)
+
+    # test ser with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_int_series,
+            default_index_native_int_series,
+            lambda ser: ser.iloc[range_key],
+        )
+
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    # test ser with non default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            int_series_with_non_default_index,
+            native_int_series_with_non_default_index,
+            lambda ser: ser.iloc[range_key],
+        )
+
+
+@pytest.mark.parametrize(
+    "range_key",
+    [
+        # pandas fails when:
+        # 1. start >= num_rows and step < 0 and stop < 0
+        # 2. start < -num_rows and step > 0 and stop > 0
+        # num_rows = 7
+        range(7, -1, -1),
+        range(1000, -500, -31556),
+        range(-8, 1, 1),
+        range(-119085, 1805, 15792),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_series_iloc_get_range_deviating_behavior(
+    default_index_native_int_series, range_key
+):
+    def iloc_helper(ser):
+        if isinstance(ser, pd.Series):
+            return ser.iloc[range_key]
+        # Convert range key to slice key for comparison since pandas fails with given ranges.
+        start, stop, step = range_key.start, range_key.stop, range_key.step
+        if (start > stop and step > 0) or (start < stop and step < 0):
+            slice_key = slice(0, 0, 1)
+        else:
+            slice_key = slice(start, stop, step)
+        return ser.iloc[slice_key]
+
+    default_index_int_series = pd.Series(default_index_native_int_series)
+
+    with pytest.raises(IndexError, match="positional indexers are out-of-bounds"):
+        _ = default_index_native_int_series.iloc[range_key]
+
+    # test ser with default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            default_index_int_series,
+            default_index_native_int_series,
+            iloc_helper,
+        )
+
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    # test ser with non default index
+    with SqlCounter(query_count=1, join_count=0):
+        eval_snowpark_pandas_result(
+            int_series_with_non_default_index,
+            native_int_series_with_non_default_index,
+            iloc_helper,
+        )
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_values",
+    [
+        ([1, 2, 3], [], [91]),
+        ([1, 2, 3], [0, 2, 3], [91, 92, 93]),
+        (
+            [3, 2, 1],
+            [3, 2, 0],
+            [91, 92, 93],
+        ),
+        ([3, 2, 1], [0, 2, 3], [91, 92, 93]),
+        ([1, 3, 0], [3, 2, 0], [91]),
+        ([False, True, True, True], [True, False, True, True], [91, 92, 93]),
+    ],
+)
+def test_iloc_with_row_key_series_rhs_series(
+    numeric_test_data_4x4, row_pos, col_pos, item_values
+):
+    snow_df = pd.DataFrame(numeric_test_data_4x4)
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+
+    snow_row_key = pd.Series(row_pos)
+    native_row_key = native_pd.Series(row_pos)
+
+    snow_col_key = pd.Series(col_pos)
+    if len(col_pos) == 0:
+        # An empty list defaults to float which is not supported here.
+        snow_col_key = snow_col_key.astype("int")
+    native_col_key = native_pd.Series(col_pos)
+
+    snow_values_df = pd.Series(item_values)
+    native_values_df = native_pd.Series(item_values)
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = snow_values_df
+        else:
+            df.iloc[native_row_key, native_col_key] = native_values_df
+
+    expected_query_count = 2
+    expected_join_count = 2 if all(isinstance(i, bool) for i in row_pos) else 3
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_pos, col_pos, item_values, native_values",
+    [
+        ([1, 2, 3], [0, 2, 3], [91, 92], [91, 92, 92]),
+        (
+            [3, 2, 1],
+            [3, 2, 0],
+            [],
+            [[None, None, None], [None, None, None], [None, None, None]],
+        ),
+        ([3, 1, 2], [2, 0, 3], [None], [None, None, None]),
+        ([2, 3, 1], [3, 1, 2], [91, 92], [91, 92, 92]),
+        ([False, True, True, True], [True, False, True, True], [91, 92], [91, 92, 92]),
+    ],
+)
+def test_iloc_with_row_key_series_rhs_series_no_shape_check(
+    numeric_test_data_4x4, row_pos, col_pos, item_values, native_values
+):
+    snow_df = pd.DataFrame(numeric_test_data_4x4)
+    native_df = native_pd.DataFrame(numeric_test_data_4x4)
+
+    snow_row_key = pd.Series(row_pos)
+    native_row_key = native_pd.Series(row_pos)
+
+    snow_col_key = pd.Series(col_pos)
+    native_col_key = native_pd.Series(col_pos)
+
+    def perform_iloc(df):
+        if isinstance(df, pd.DataFrame):
+            df.iloc[snow_row_key, snow_col_key] = pd.Series(item_values)
+        else:
+            if native_values:
+                # This means the equivalent pandas operation we expect to fail, but we'll use a different
+                # item value that should match what Snowpark pandas produces.
+                if isinstance(native_values[0], list):
+                    native_values_df = native_pd.DataFrame(native_values)
+                else:
+                    native_values_df = native_pd.Series(native_values)
+            else:
+                native_values_df = native_pd.Series(item_values)
+            df.iloc[native_row_key, native_col_key] = native_values_df
+
+    expected_query_count = 2
+    expected_join_count = 2 if all(isinstance(i, bool) for i in row_pos) else 3
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_df, native_df, perform_iloc, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "row_key, row_key_index",
+    [
+        [1, None],
+        [[3, 0], None],
+        [[1, 2], [("A",), ("B",)]],
+        [[2, 1], [("A", 1), ("B", 2)]],
+    ],
+)
+@pytest.mark.parametrize(
+    "item_values, item_index, expected_join_count",
+    [
+        [999, None, 2],
+        [TEST_ITEMS_DATA_2X1, None, 3],
+        [TEST_ITEMS_DATA_2X1, [("r",), ("s",)], 4],
+        [TEST_ITEMS_DATA_2X1, [("r", 20), ("s", 25)], 5],
+    ],
+)
+def test_df_iloc_set_with_multi_index(
+    row_key, row_key_index, item_values, item_index, expected_join_count
+):
+    ser_data = [10, 11, 12, 13, 14]
+    row_index = pd.MultiIndex.from_tuples(
+        [("x", 99), ("y", 11), ("x", 11), ("y", 99), ("z", -12)]
+    )
+
+    snow_ser = pd.Series(ser_data, index=row_index)
+    native_ser = native_pd.Series(ser_data, index=row_index)
+
+    if is_scalar(row_key):
+        if isinstance(item_values, list):
+            item_values = item_values[:1]
+        if item_index:
+            item_index = item_index[:1]
+
+    if isinstance(item_values, list):
+        snow_items = pd.Series(item_values)
+        native_items = native_pd.Series(item_values)
+    else:
+        snow_items = item_values
+        native_items = item_values
+
+    if item_index:
+        snow_items.index = pd.MultiIndex.from_tuples(item_index)
+        native_items.index = pd.MultiIndex.from_tuples(item_index)
+
+    if row_key_index:
+        snow_row_key = pd.Series(row_key, index=pd.Index(row_key_index))
+        native_row_key = native_pd.Series(row_key, index=pd.Index(row_key_index))
+    else:
+        snow_row_key = row_key
+        native_row_key = row_key
+
+    def helper_iloc(ser):
+        if isinstance(ser, native_pd.Series):
+            ser.iloc[native_row_key] = native_items
+        else:
+            ser.iloc[snow_row_key] = snow_items
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(snow_ser, native_ser, helper_iloc, inplace=True)
+
+
+def test_series_iloc_get_series_with_multiindex(
+    default_index_native_int_series, multiindex_native_int_series
+):
+    def run_multiindex_test(_ser: pd.Series, _native_ser: native_pd.Series) -> None:
+        def iloc_helper(series: Union[pd.Series, native_pd.Series]) -> None:
+            return (
+                series.iloc[_ser]
+                if isinstance(series, pd.Series)
+                else series.iloc[_native_ser]
+            )
+
+        # test ser with default index
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                default_index_int_series,
+                default_index_native_int_series,
+                iloc_helper,
+            )
+
+        # test ser with non default index
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                int_series_with_non_default_index,
+                native_int_series_with_non_default_index,
+                iloc_helper,
+            )
+
+        # test ser with MultiIndex
+        with SqlCounter(query_count=1, join_count=2):
+            eval_snowpark_pandas_result(
+                int_series_with_multiindex,
+                multiindex_native_int_series,
+                iloc_helper,
+                check_index_type=False,
+            )
+
+    # Create series for running tests on.
+    default_index_int_series = pd.Series(default_index_native_int_series)
+    native_int_series_with_non_default_index = default_index_native_int_series.reindex()
+    int_series_with_non_default_index = pd.Series(
+        native_int_series_with_non_default_index
+    )
+    int_series_with_multiindex = pd.Series(multiindex_native_int_series)
+
+    # Evaluate with MultiIndex created from tuples.
+    arrays = [
+        ["bar", "bar", "baz", "baz"],
+        ["one", "two", "one", "two"],
+    ]
+    tuples = list(zip(*arrays))
+    index = native_pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
+    native_ser = native_pd.Series([2, 3, 4, 5], index=index)
+    ser = pd.Series([2, 3, 4, 5], index=index)
+    run_multiindex_test(ser, native_ser)
+
+    # Evaluate with MultiIndex created from product.
+    iterables = [["bar", "baz", "foo"], [22, 23]]
+    index = native_pd.MultiIndex.from_product(iterables, names=[2, "second"])
+    ser = pd.Series([0, 1, 2, 3, 4, 5], index=index)
+    native_ser = native_pd.Series([0, 1, 2, 3, 4, 5], index=index)
+    run_multiindex_test(ser, native_ser)
+
+    # Evaluate with MultiIndex created from a DataFrame.
+    dataframe = native_pd.DataFrame(
+        [["bar", "one"]],
+        columns=["first", "second"],
+    )
+    index = native_pd.MultiIndex.from_frame(dataframe)
+    ser = pd.Series([4], index=index)
+    native_ser = native_pd.Series([4], index=index)
+    run_multiindex_test(ser, native_ser)
+
+    # Evaluate with MultiIndex created from an empty DataFrame.
+    dataframe = native_pd.DataFrame([], columns=["first", "second"])
+    index = native_pd.MultiIndex.from_frame(dataframe)
+    ser = pd.Series([], index=index, dtype=int)
+    native_ser = native_pd.Series([], index=index, dtype=int)
+    run_multiindex_test(ser, native_ser)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+def test_iloc_get_multiindex_key_negative(
+    default_index_native_int_series, multiindex_native
+):
+    with pytest.raises(
+        TypeError, match="key of type MultiIndex cannot be used with iloc"
+    ):
+        snowpark_series = pd.Series(default_index_native_int_series)
+        _ = snowpark_series.iloc[multiindex_native]
+
+
+def test_series_iloc_get_key_bool_series_from_itself():
+    # The join in the following iloc operation is a self-join on row position column,
+    # so it will be skipped and no join query is issued.
+    series = pd.Series([4, 3, 3, 1])
+    with SqlCounter(query_count=1, join_count=0):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            series.iloc[series == 3],
+            native_pd.Series([3, 3], index=[1, 2]),
+        )
+
+    series = pd.Series([True, False, False, True])
+    with SqlCounter(query_count=1, join_count=0):
+        assert_snowpark_pandas_equal_to_pandas(
+            series.iloc[series],
+            native_pd.Series([True, True], index=[0, 3]),
+        )
+
+
+@pytest.mark.xfail(reason="TODO: SNOW-991872 support set cell to array values")
+def test_series_iloc_set_scalar_key_with_list_value_negative():
+    series = pd.Series([[1], [2], [3]])
+    native_s = native_pd.Series([[1], [2], [3]])
+
+    def helper(series):
+        series.iloc[0] = [4, 5, 6]
+
+    eval_snowpark_pandas_result(series, native_s, helper, inplace=True)
diff --git a/tests/integ/modin/series/test_isin.py b/tests/integ/modin/series/test_isin.py
new file mode 100644
index 00000000000..5b1f30a3480
--- /dev/null
+++ b/tests/integ/modin/series/test_isin.py
@@ -0,0 +1,178 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+    try_cast_to_snowpark_pandas_dataframe,
+    try_cast_to_snowpark_pandas_series,
+)
+
+
+def _test_isin_with_snowflake_logic(s, values):
+    # convert to Snowpark pandas API universe
+    if isinstance(s, pd.Series):
+        if isinstance(values, native_pd.Series):
+            values = try_cast_to_snowpark_pandas_series(values)
+        if isinstance(values, native_pd.DataFrame):
+            values = try_cast_to_snowpark_pandas_dataframe(values)
+
+    ans = s.isin(values)
+
+    # Following code is to emulate Snowflake behavior:
+    # In Snowflake semantics, preserve nulls. E.g., NULL.isin([NULL]) will yield NULL, but not True/False
+    # similarly, if the values passed to isin contain a single NULL, the result will be NULL
+    if isinstance(ans, pd.Series):
+        ans = ans.to_pandas()
+    data = s.to_pandas().values if isinstance(s, pd.Series) else s.values
+    for i, v in enumerate(data):
+        if v is None or pd.isna(v):
+            ans.iloc[i] = None
+    ans = ans.fillna(value=np.nan)
+    if any([pd.isna(value) for value in values]):
+        ans.iloc[:] = np.nan
+
+    # convert back to use eval function below.
+    if isinstance(s, pd.Series):
+        ans = pd.Series(ans)
+
+    return ans
+
+
+@pytest.mark.parametrize(
+    "values, expected_query_count",
+    [
+        ([], 3),
+        ([1, 2, 3], 3),
+        ([None], 3),
+        ([None, 2], 3),
+        ([99, 97, 93, 100000], 3),
+        (np.array([]), 3),
+        (np.array([1, 2, 1]), 3),
+        (np.array([None, 1, 2]), 3),
+        (native_pd.Series(), 5),
+        # (native_pd.Series([2, 3], index=["A", "B"]), 1), # not supported anymore because of index type mismatch
+        # (native_pd.Series(index=["A", "B"]), 1), # not supported anymore because of index type mismatch
+        (native_pd.Series([None, -10]), 5),
+        (native_pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), 5),
+        (pd.Index([4, 5, 6]), 3),
+    ],
+)
+def test_isin_integer_data(values, expected_query_count):
+    data = [3, 4, 2, 1, None, 0, 5, 4, 2, -10, -20, -42, None]
+    with SqlCounter(query_count=expected_query_count):
+        snow_series = pd.Series(data)
+        native_series = native_pd.Series(data)
+
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda s: _test_isin_with_snowflake_logic(s, values),
+        )
+
+
+@pytest.mark.parametrize(
+    "values, expected_query_count",
+    [
+        (native_pd.Series([2, 3], index=["A", "B"]), 1),
+        (native_pd.Series(index=["A", "B"]), 1),
+    ],
+)
+@pytest.mark.xfail(
+    reason="Snowpark pandas does not support isin with index type mismatch."
+)
+def test_isin_with_incompatible_index(values, expected_query_count):
+    data = [3, 4, 2, 1, None, 0, 5, 4, 2, -10, -20, -42, None]
+    with SqlCounter(query_count=expected_query_count):
+        snow_series = pd.Series(data)
+        native_series = native_pd.Series(data)
+
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda s: _test_isin_with_snowflake_logic(s, values),
+        )
+
+
+@pytest.mark.parametrize(
+    "data,values,expected_query_count",
+    [
+        ([], native_pd.Series([]), 5),
+        ([1, 2, 3], native_pd.Series([]), 5),
+        ([], native_pd.Series([2, 3, 4]), 5),
+        ([1, 2, 3, 8], native_pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}), 5),
+        (["A", "B", ""], [], 3),
+        (["A", "B", ""], ["A"], 3),
+        (["A", "B", ""], ["A", "B", "C", "D"], 3),
+        (["A", "B", None, ""], [], 3),
+        (["A", "B", None, ""], ["A"], 3),
+        (["A", "B", None, ""], ["A", "B", "C", "D"], 3),
+        (["A", "B", None, ""], [None], 3),
+        (["A", "B", ""], [None], 3),
+        (["A", "B", ""], np.array(["A", "B", "C", "D"]), 3),
+        ([False, True], [], 3),
+        ([False, True], [False], 3),
+        ([False, True], [True], 3),
+        ([False, True, None], [False, True], 3),
+        ([False, True], [None, True, False], 3),
+        ([1.0, 1.1, 1.2, 1.3], [1.2], 3),
+        ([1.0, 1.1, 1.2, 1.3], [99.99999], 3),
+        ([1.0, 1.1, 1.2, 1.3], [1, 2, 3], 3),
+        ([1, 2, 3], [1, 2, 99.9], 3),
+        (["string 1", 0.012, -3, None, "string 2", 3.14], [3.14, "string 1"], 3),
+        (
+            ["string 1", 0.012, -3, None, "string 2", 3.14],
+            np.array([3.14, "string 1"]),
+            3,
+        ),
+        (["string 1", 0.012, -3, None, "string 2", 3.14], [7, "test", 89.9], 3),
+        ([1, 2, 3, 2, 1], {3, 4, 3}, 3),
+    ],
+)
+def test_isin_various_combos(data, values, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        snow_series = pd.Series(data)
+        native_series = native_pd.Series(data)
+
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda s: _test_isin_with_snowflake_logic(s, values),
+        )
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_isin_lazy():
+    s_data = [1, 2, 3, 4, 5]
+    df_data = {"a": [1, 2, "test"], "b": [4, 5, 6]}
+
+    snow_series = pd.Series(s_data)
+    snow_df = pd.DataFrame(df_data)
+    snow_ans = snow_series.isin(snow_df["a"])
+
+    native_series = native_pd.Series(s_data)
+    native_df = native_pd.DataFrame(df_data)
+    native_ans = native_series.isin(native_df["a"])
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+@sql_count_checker(query_count=0)
+def test_isin_with_str_negative():
+    s = pd.Series([1, 2, 3])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "only list-like objects are allowed to be passed to isin(), you passed a [str]"
+        ),
+    ):
+        s.isin("test")
diff --git a/tests/integ/modin/series/test_isna.py b/tests/integ/modin/series/test_isna.py
new file mode 100644
index 00000000000..39245ca86c5
--- /dev/null
+++ b/tests/integ/modin/series/test_isna.py
@@ -0,0 +1,117 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.data import RAW_NA_DF_SERIES_TEST_CASES
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    eval_snowpark_pandas_result,
+    update_none_in_series_data_test_cases,
+)
+
+
+def run_series_test_helper_on_df_data(series_input, operation=lambda df: df.isna()):
+    eval_snowpark_pandas_result(
+        pd.Series(series_input), native_pd.Series(series_input), operation
+    )
+
+
+@pytest.mark.parametrize("series_input, test_case_name", RAW_NA_DF_SERIES_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_none_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
+
+
+@pytest.mark.parametrize("series_input, test_case_name", RAW_NA_DF_SERIES_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_notna_with_none_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input, operation=lambda df: df.notna())
+
+
+@pytest.mark.parametrize("series_input, test_case_name", RAW_NA_DF_SERIES_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_isnull_with_none_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input, operation=lambda df: df.isnull())
+
+
+@pytest.mark.parametrize("series_input, test_case_name", RAW_NA_DF_SERIES_TEST_CASES)
+@sql_count_checker(query_count=1)
+def test_dataframe_notnull_with_none_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input, operation=lambda df: df.notnull())
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(
+        RAW_NA_DF_SERIES_TEST_CASES, np.nan, "np.nan"
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_np_nan_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(RAW_NA_DF_SERIES_TEST_CASES, pd.NA, "pd.NA"),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_na_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(
+        RAW_NA_DF_SERIES_TEST_CASES, pd.NaT, "pd.NaT"
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_nat_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(
+        series_input,
+    )
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(
+        RAW_NA_DF_SERIES_TEST_CASES, math.nan, "math.NA"
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_math_na_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(
+        RAW_NA_DF_SERIES_TEST_CASES,
+        pd.array([1, None], dtype=pd.Int64Dtype())[-1],
+        "pd.array(None)",
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_pd_array_none_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
+
+
+@pytest.mark.parametrize(
+    "series_input, test_case_name",
+    update_none_in_series_data_test_cases(
+        RAW_NA_DF_SERIES_TEST_CASES, float("nan"), "float(nan)"
+    ),
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_isna_with_float_nan_values(series_input, test_case_name):
+    run_series_test_helper_on_df_data(series_input)
diff --git a/tests/integ/modin/series/test_len.py b/tests/integ/modin/series/test_len.py
new file mode 100644
index 00000000000..ed0a8d417bc
--- /dev/null
+++ b/tests/integ/modin/series/test_len.py
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.mark.parametrize(
+    "sample, expected_len",
+    [
+        ([], 0),
+        ([1, 2], 2),
+        ([1, 2, None], 3),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_len(sample, expected_len):
+    snow = pd.Series(sample)
+    native = native_pd.Series(sample)
+    assert len(snow) == expected_len
+    assert len(snow) == len(native)
diff --git a/tests/integ/modin/series/test_loc.py b/tests/integ/modin/series/test_loc.py
new file mode 100644
index 00000000000..6c994b8025a
--- /dev/null
+++ b/tests/integ/modin/series/test_loc.py
@@ -0,0 +1,1759 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import functools
+import numbers
+import random
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import Series
+from pandas._libs.lib import is_bool, is_scalar
+from pandas.errors import IndexingError
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.frame.test_loc import (
+    diff2native_negative_row_inputs,
+    negative_snowpark_pandas_input_keys,
+    row_inputs,
+)
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+    generate_a_random_permuted_list_exclude_self,
+)
+
+EMPTY_LIST_LIKE_VALUES = [
+    [],
+    pd.Index([]),
+    np.array([]),
+    native_pd.Series([]),
+]
+
+SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES = [
+    # series
+    native_pd.Series([random.randint(0, 6) for _ in range(7)]),
+    # list-like
+    native_pd.Index([random.randint(0, 6) for _ in range(7)]),
+]
+
+# Values that are scalars or behave like scalar keys and items.
+SCALAR_LIKE_VALUES = [0, "xyz", None, 3.14]
+
+# Data types for row key and item values with loc set.
+LIST_LIKE_AND_SERIES_DATA_TYPES = [
+    "index",
+    "ndarray",
+    "index with name",
+    "list",
+    "series",
+    "series with name",
+    "series with non-default index",
+]
+
+
+@pytest.fixture
+def convert_data_to_data_type():
+    def converter(data, data_type):
+        if "list" in data_type:
+            return data
+        elif "series" in data_type:
+            if data_type == "series with non-default index":
+                index = [
+                    3,  # 3 does not exist in the default row_key index, so it will be skipped
+                    2,
+                    1,
+                ]
+                return native_pd.Series(data, index=index[: len(data)])
+            elif data_type == "series with name":
+                name = "xyz"  # name does not matter
+                return native_pd.Series(data, name=name)
+            else:
+                return native_pd.Series(data)
+        elif "index" in data_type:
+            name = "random name"  # name does not matter
+            if data_type == "index with name":
+                return pd.Index(data, name=name)
+            else:
+                return pd.Index(data)
+        elif data_type == "ndarray":
+            return np.array(data)
+
+    return converter
+
+
+@pytest.fixture(params=[True, False])
+def use_default_index(request):
+    return request.param
+
+
+@pytest.fixture(params=["series", "list", "array", "index"], scope="module")
+def key_type(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "key",
+    row_inputs,
+)
+def test_series_loc_get_return_series(
+    key, str_index_snowpark_pandas_series, str_index_native_series
+):
+    with SqlCounter(query_count=2 if is_scalar(key) else 1):
+        eval_snowpark_pandas_result(
+            str_index_snowpark_pandas_series,
+            str_index_native_series,
+            lambda df: df.loc[key],
+        )
+
+
+@pytest.mark.parametrize(
+    "key,error_type,error_msg",
+    diff2native_negative_row_inputs,
+)
+@sql_count_checker(query_count=0)
+def test_series_loc_get_negative_diff2native(
+    key, error_type, error_msg, str_index_snowpark_pandas_series
+):
+    with pytest.raises(
+        error_type,
+        match=error_msg,
+    ):
+        _ = str_index_snowpark_pandas_series.loc[key]
+
+
+@pytest.mark.parametrize(
+    "key",
+    negative_snowpark_pandas_input_keys,
+)
+@sql_count_checker(query_count=0)
+def test_series_loc_get_negative_snowpark_pandas_input(
+    key,
+    str_index_snowpark_pandas_series,
+    negative_loc_snowpark_pandas_input_map,
+    str_index_native_series,
+):
+    eval_snowpark_pandas_result(
+        str_index_snowpark_pandas_series,
+        str_index_native_series,
+        lambda df: df.loc[negative_loc_snowpark_pandas_input_map[key][0]]
+        if isinstance(df, Series)
+        else df.loc[negative_loc_snowpark_pandas_input_map[key][1]],
+        expect_exception=True,
+    )
+
+
+def _deterministic_standard_normal(size, seed=0):
+    np.random.seed(0)
+    return np.random.normal(size=size)
+
+
+@pytest.mark.parametrize(
+    "series,key",
+    [
+        (native_pd.Series([1, 2, 3]), 2),
+        (native_pd.Series(["A", "B", "C", "D", "E"]), 3),
+        (native_pd.Series(_deterministic_standard_normal(1000)), 458),
+        (
+            native_pd.Series(["A", "B", "C", "D", "E"], index=[5, 3, 2, 4, 1]),
+            4,
+        ),
+        (native_pd.Series([5, 3, 2, 4, 1], index=["A", "B", "C", "D", "E"]), "D"),
+        (
+            native_pd.Series(
+                list(range(5)),
+                index=pd.MultiIndex.from_tuples(
+                    list(zip(["A", "B", "C", "D", "E"], range(5)))
+                ),
+            ),
+            random.choice(list(zip(["A", "B", "C", "D", "E"], range(5)))),
+        ),
+    ],
+)
+def test_series_loc_get_basic(series, key):
+    query_count = 2
+
+    # If the series is big it won't be inlined, so there is extra overhead in creating and dropping temporary tables
+    # that we expect to happen for this test.
+    expect_high_count = len(series) > 100
+    high_count_reason = None
+    if expect_high_count:
+        query_count += 10
+        high_count_reason = "SNOW-998609: Snowpark pandas Series wrapping another series results in duplicate queries"
+
+    # returns single element for series
+    with SqlCounter(
+        query_count=query_count,
+        high_count_expected=expect_high_count,
+        high_count_reason=high_count_reason,
+    ):
+        assert pd.Series(series).loc[key] == series.loc[key]
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_series_loc_get_all_rows():
+    data = [1, 2, 3]
+    columns = ["A"]
+
+    snow_df = pd.DataFrame(data=data, columns=columns)
+    native_df = native_pd.DataFrame(data=data, columns=columns)
+
+    def apply_loc(df):
+        df.loc[:, "A"] = 0
+        return df
+
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: apply_loc(df),
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True] * 5,
+        [False] * 5,
+        [random.choice([True, False]) for _ in range(5)],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_get_key_bool_series_with_aligned_indices(key, use_default_index):
+    # aligned indices means both row_pos and index are exactly match
+    if use_default_index:
+        index = None
+    else:
+        # index can have null values and duplicates
+        index = pd.Index(["a", "a", None, "b", "b"], name="index")
+    native_series = native_pd.Series([1, 2, 3, 4, 5], index=index)
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: s.loc[pd.Series(key, index=index, dtype="bool")]
+        if isinstance(s, pd.Series)
+        else s.loc[native_pd.Series(key, index=index, dtype="bool")],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [True] * 5,
+        [False] * 5,
+        [random.choice([True, False]) for _ in range(5)],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_get_key_bool_series_with_unaligned_and_distinct_indices(
+    key, use_default_index
+):
+    # unaligned and distinct indices: e.g., [1,2,3,4,5] vs [5,4,3,2,1]
+    if use_default_index:
+        index = None
+        key_index = np.random.permutation(range(5))
+    else:
+        index_value = ["a", "b", "c", "d", "e"]
+        index = pd.Index(index_value, name="index")
+        key_index = pd.Index(
+            generate_a_random_permuted_list_exclude_self(index_value), name="index"
+        )
+    native_series = native_pd.Series([1, 2, 3, 4, 5], index=index)
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: s.loc[pd.Series(key, index=key_index, dtype="bool")]
+        if isinstance(s, pd.Series)
+        else s.loc[native_pd.Series(key, index=key_index, dtype="bool")],
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [True] * 7,
+        [False] * 7,
+        [random.choice([True, False]) for _ in range(7)],
+        # length mismatch
+        [random.choice([True, False]) for _ in range(random.randint(1, 7))],
+        [random.choice([True, False]) for _ in range(random.randint(8, 20))],
+    ],
+)
+def test_series_loc_get_key_bool(key, key_type, default_index_native_series):
+    def loc_helper(ser):
+        if isinstance(ser, native_pd.Series):
+            # If native pandas Series, truncate the series and key.
+            _ser = ser[: len(key)]
+            _key = key[: len(_ser)]
+        else:
+            _key, _ser = key, ser
+
+        # Convert key to the required type.
+        if key_type == "index":
+            _key = pd.Index(_key, dtype=bool)
+        elif key_type == "ndarray":
+            _key = np.array(_key, dtype=bool)
+        elif key_type == "series":
+            _key = (
+                pd.Series(_key)
+                if isinstance(_ser, pd.Series)
+                else native_pd.Series(_key)
+            )
+
+        return _ser.loc[_key]
+
+    default_index_series = pd.Series(default_index_native_series)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            default_index_series,
+            default_index_native_series,
+            loc_helper,
+        )
+
+
+@pytest.mark.parametrize(
+    "row",
+    [
+        lambda x: [1, 3],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_loc_get_callable_key(row):
+    native_series = native_pd.Series([1, 2, 3, 4, 5])
+    eval_snowpark_pandas_result(
+        pd.Series(native_series),
+        native_series,
+        lambda df: df.loc[row],
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_get_key_bool_series_with_unaligned_and_duplicate_indices():
+    # index can have null values and duplicates
+    key = [True] * 5
+    index_value = ["a", "a", None, "b", "b"]
+    index = pd.Index(index_value, name="index")
+    native_series = native_pd.Series([1, 2, 3, 4, 5], index=index)
+
+    permuted_index_value = generate_a_random_permuted_list_exclude_self(index_value)
+    key_index = pd.Index(permuted_index_value, dtype="string")
+    snow_series = pd.Series(native_series)
+    series_key = pd.Series(key, index=key_index, dtype="bool")
+    native_series_key = native_pd.Series(key, index=key_index, dtype="bool")
+
+    # Note:
+    # pandas: always raise IndexingError when indices with duplicates are not aligned
+    # Snowpark pandas: perform outer join on index and no error will be raised
+    with pytest.raises(IndexingError):
+        native_series.loc[native_series_key]
+
+    assert_series_equal(
+        snow_series.loc[series_key],
+        native_pd.Series(
+            [1, 1, 2, 2, 3, 4, 4, 5, 5],
+            index=pd.Index(
+                ["a", "a", "a", "a", None, "b", "b", "b", "b"], name="index"
+            ),
+        ),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key",
+    [
+        [],
+        [
+            random.choice([True, False]) for _ in range(random.randint(0, 4))
+        ],  # shorter length
+        [
+            random.choice([True, False]) for _ in range(random.randint(6, 10))
+        ],  # larger length
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_get_key_bool_series_with_mismatch_index_len(key, use_default_index):
+    if use_default_index:
+        index = None
+        key_index = np.random.permutation(len(key))
+    else:
+        index = ["a", "b", "c", "d", "e", "a1", "b1", "c1", "d1", "e1"]
+        key_index = pd.Index(np.random.permutation(index[: len(key)]), dtype="string")
+        index = np.random.permutation(index[:5])
+    native_series = native_pd.DataFrame([1, 2, 3, 4, 5], index=index)
+    snow_series = pd.DataFrame(native_series)
+    native_series_key = native_pd.Series(key, index=key_index, dtype="bool")
+
+    series_key = pd.Series(key, index=key_index, dtype="bool")
+    if len(key) < 5:
+        # Native pandas raises error if any index from native_df is not in the key; when no missing index exists, native
+        # pandas will perform as expected even though the key includes out-of-bound index
+        with pytest.raises(IndexingError):
+            native_series.loc[native_series_key]
+        # Snowpark pandas does not raise error but select the index existing in the key. So the behavior is same as
+        # the native one if the missing ones are dropped from native_df
+        native_series = native_series.drop(
+            index=[i for i in native_series.index if i not in native_series_key.index]
+        )
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda df: df.loc[series_key]
+        if isinstance(df, pd.DataFrame)
+        else df.loc[native_series_key],
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_series_loc_get_key_scalar(
+    default_index_snowpark_pandas_series, default_index_native_series
+):
+    key = random.choice(range(0, len(default_index_native_series)))
+    # squeeze triggers additional queries
+    snow = default_index_snowpark_pandas_series.loc[key]
+    expected = default_index_native_series.loc[key]
+    if isinstance(expected, tuple):
+        # Snowpark pandas exposes tuple to list
+        expected = list(expected)
+    assert snow == expected
+
+
+@pytest.mark.parametrize(
+    "native_series_key",
+    [
+        native_pd.Series([]),
+        native_pd.Series([0]),
+        native_pd.Series([3, 2, 1]),
+        native_pd.Series([3, 2, 1]),
+        native_pd.Series([3, 2, 1], index=[300, 244, 234]),  # index is ignored
+        native_pd.Series([2, 3, 1, 3, 2, 1]),
+        native_pd.Series(
+            [random.choice(range(0, 5)) for _ in range(random.randint(0, 20))]
+        ),
+    ],
+)
+def test_series_loc_get_key_non_boolean_series(
+    native_series_key,
+    key_type,
+    default_index_snowpark_pandas_series,
+    default_index_native_series,
+):
+    def loc_helper(s, index_name=None):
+        def type_convert(key, is_snow_type):
+            if key_type == "series":
+                return pd.Series(key) if is_snow_type else native_pd.Series(key)
+            elif key_type == "list":
+                return key.to_list()
+            elif key_type == "array":
+                return key.to_numpy()
+            elif key_type == "index":
+                # native pandas has a bug to overwrite loc result's index name to the key's index name
+                # so for testing, we overwrite the index name to be the same as the index name in the main frame
+                return pd.Index(key.to_list(), name=index_name)
+
+        return s.loc[type_convert(native_series_key, isinstance(s, pd.Series))]
+
+    # default index
+    # Note: here number of queries are 2 due to the data type of the series is variant and to_pandas needs to call
+    # typeof to get the value types
+    # TODO: SNOW-933782 optimize to_pandas for variant columns to only fire one query
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_series,
+            default_index_native_series,
+            loc_helper,
+        )
+
+    # non default index
+    non_default_index_native_series = (
+        default_index_native_series.reset_index()
+        .astype({"index": "Int64"})
+        .set_index("index")
+        .squeeze()
+    )
+    non_default_index_snowpark_pandas_series = pd.Series(
+        non_default_index_native_series
+    )
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            non_default_index_snowpark_pandas_series,
+            non_default_index_native_series,
+            functools.partial(loc_helper, index_name="index"),
+            check_index_type=False,
+        )
+
+    # non default index with duplicates and null
+    dup_native_series = native_pd.concat(
+        [
+            non_default_index_native_series,
+            non_default_index_native_series,
+            non_default_index_native_series.reindex([[None] * 7]),
+        ]
+    )
+    dup_snowpandas_series = pd.Series(dup_native_series)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            dup_snowpandas_series,
+            dup_native_series,
+            functools.partial(loc_helper, index_name="index"),
+            check_index_type=False,
+        )
+
+    # use key with null values
+    native_series_key = native_pd.concat([native_series_key.astype("Int64"), None])
+    non_default_index_native_series = (
+        default_index_native_series.reset_index()
+        .astype({"index": "Int64"})
+        .set_index("index")
+        .squeeze()
+    )
+
+    dup_native_series = native_pd.concat(
+        [
+            non_default_index_native_series,
+            non_default_index_native_series,
+            non_default_index_native_series.reindex([[None] * 7]),
+        ]
+    )
+    dup_snowpandas_series = pd.Series(dup_native_series)
+    with SqlCounter(query_count=1, join_count=1):
+        eval_snowpark_pandas_result(
+            dup_snowpandas_series,
+            dup_native_series,
+            functools.partial(loc_helper, index_name="index"),
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("monotonic_decreasing", [False, True])
+# to_pandas with variant columns added one more query
+# TODO: SNOW-933782 should resolve it
+@sql_count_checker(query_count=1)
+def test_series_loc_get_key_slice(
+    monotonic_decreasing,
+    default_index_snowpark_pandas_series,
+    default_index_native_series,
+):
+    seed = [None, -10, -3, -1, 1, 3, 10]
+    key = slice(random.choice(seed), random.choice(seed), random.choice(seed))
+
+    if monotonic_decreasing:
+        native_series = default_index_native_series[::-1]
+        snow_series = pd.Series(native_series)
+    else:
+        native_series = default_index_native_series
+        snow_series = default_index_snowpark_pandas_series
+
+    eval_snowpark_pandas_result(snow_series, native_series, lambda s: s.loc[key])
+
+
+@pytest.fixture(scope="function")
+def multiindex_series():
+    tuples = [
+        ("mark i", "mark v"),
+        ("mark i", "mark vi"),
+        ("sidewinder", "mark i"),
+        ("sidewinder", "mark ii"),
+        ("viper", "mark ii"),
+        ("viper", "mark iii"),
+    ]
+    index = pd.MultiIndex.from_tuples(tuples)
+    values = [12, 2, 0, 4, 10, 20]
+
+    return native_pd.Series(values, index=index)
+
+
+@pytest.mark.parametrize(
+    "loc_with_slice",
+    [
+        lambda s: s.loc["mark i":"sidewinder"],
+        lambda s: s.loc[("mark i", "mark v"):],
+        lambda s: s.loc[("mark i",):],
+        lambda s: s.loc[("mark i", "mark v"):("sidewinder", "mark i")],
+        lambda s: s.loc["mark i":("sidewinder", "mark i")],
+        lambda s: s.loc["mark i":"sidewinder":2],
+        lambda s: s.loc["sidewinder":"mark i":-2],
+        lambda s: s.loc["mark v":"mark vi"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_mi_series_loc_get_slice_key(multiindex_series, loc_with_slice):
+    s = pd.Series(multiindex_series)
+    eval_snowpark_pandas_result(
+        s,
+        multiindex_series,
+        loc_with_slice,
+        check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "key, native_error",
+    [
+        # scalar key behavior: prefix match plus drop level
+        ["mark i", None],
+        [(("mark i",)), None],
+        [("mark i", "mark vi"), None],
+        ["mark ii", KeyError],
+        # list-like key with non-list-like value behavior: prefix match
+        [["mark i"], None],  # single value
+        [["viper", "mark i", "viper"], None],
+        [
+            ["invalid"],
+            KeyError,
+        ],  # return empty data frame if no match found while native pandas raise error
+        # list-like key with list-like value behavior: exact match
+        [[("mark i", "mark vi")], None],
+        [[["mark i", "mark vi"]], None],
+        [[("mark i", "mark vi"), ["mark i", "mark vi"], ("viper", "mark ii")], None],
+        [
+            [("mark i",)],
+            AssertionError,
+        ],  # return empty data frame since no exact match found
+    ],
+)
+def test_mi_series_loc_get_non_boolean_list_key(multiindex_series, key, native_error):
+    s = pd.Series(multiindex_series)
+    if isinstance(key, tuple) or is_scalar(key):
+        query_count, join_count = 1, 0
+        if isinstance(key, tuple) and len(key) == 2:
+            # multiindex full lookup requires squeeze to run
+            query_count += 1
+    else:
+        # other list like
+        query_count, join_count = 1, 1
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        if native_error:
+            with pytest.raises(native_error):
+                _ = multiindex_series.loc[key]
+            assert s.loc[key].empty
+        else:
+            if isinstance(key, tuple) and len(key) == 2:
+                print(s.loc[key])
+                print(multiindex_series.loc[key])
+            else:
+                eval_snowpark_pandas_result(
+                    s,
+                    multiindex_series,
+                    lambda s: s.loc[key],
+                )
+
+
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        native_pd.Series([False, True, True]),
+        native_pd.Series(
+            [
+                0,  # 0 does not exist in item, so the row values will be set to NULL
+                1,
+                2,
+            ],
+            name="ccc",  # name does not matter
+        ),
+        native_pd.Series([]),
+        native_pd.Series(
+            [
+                2,
+                1,
+                0,
+                1,
+                2,
+            ]
+        ),  # duplicates with no order
+        native_pd.Series(
+            [2, 1, 0, 0, 1, 2], index=[10, 0, 30, 40, 50, 100]
+        ),  # series with non-default index
+    ],
+)
+def test_series_loc_set_series_row_key_and_series_item(row_key):
+    # series.loc[series key] = series item
+    # ------------------------------------
+    series = native_pd.Series([1, 2, 3], name="abc")
+    item = native_pd.Series(
+        [10, 20, 30],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+        name="xyz",  # name does not matter
+    )
+
+    def loc_set_helper(s):
+        if isinstance(s, native_pd.Series):
+            s.loc[row_key] = item
+        else:
+            s.loc[pd.Series(row_key)] = pd.Series(item)
+
+    expected_join_count = (
+        2 if len(row_key) > 0 and all(isinstance(i, bool) for i in row_key) else 4
+    )
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            pd.Series(series), series, loc_set_helper, inplace=True
+        )
+
+
+@pytest.mark.parametrize(
+    "row_key",
+    [
+        [True, False, True],
+        [
+            0,  # 0 does not exist in item, so the row values will be set to NULL
+            1,
+            2,
+        ],
+        [2, 0, 2],  # duplicates with no order
+    ],
+)
+@pytest.mark.parametrize("key_type", LIST_LIKE_AND_SERIES_DATA_TYPES)
+@pytest.mark.parametrize("item_type", LIST_LIKE_AND_SERIES_DATA_TYPES)
+def test_series_loc_set_series_and_list_like_row_key_and_item(
+    row_key, key_type, item_type, convert_data_to_data_type
+):  # series.loc[series/list-like key] = series/list-like item
+    # --------------------------------------------------------
+    # Length of key must match length of item in native pandas.
+    series = native_pd.Series([1, 2, 3], name="abc")
+    item = [10, 20, 30]
+
+    expected_join_count = 4
+    if all(isinstance(i, bool) for i in row_key):
+        if item_type.startswith("series"):
+            expected_join_count = 2
+        else:
+            expected_join_count = 6
+
+    # With a boolean key, the number of items provided must match the number of True values in the key in pandas.
+    if is_bool(row_key[0]):
+        item = item[:2]
+
+        if key_type == "series with non-default index":
+            # The boolean series row keys (e.g., pd.Series([True, False, True], index=[9, 5, 1])) are valid as row keys
+            # only if the index of the row key and that of the series for assignment match. The series is recreated with
+            # the same non-default index as the row_key to prevent errors.
+            series = native_pd.Series([1, 2, 3], index=[3, 2, 1], name="abc")
+
+    def loc_set_helper(s):
+        # Convert key and item to the required types.
+        _row_key = convert_data_to_data_type(row_key, key_type)
+        _item = convert_data_to_data_type(item, item_type)
+
+        if isinstance(s, pd.Series):
+            # Convert row key and item to Snowpark pandas if required.
+            _row_key = (
+                pd.Series(_row_key)
+                if isinstance(_row_key, native_pd.Series)
+                else _row_key
+            )
+            _item = pd.Series(_item) if isinstance(_item, native_pd.Series) else _item
+
+        s.loc[_row_key] = _item
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            pd.Series(series), series, loc_set_helper, inplace=True
+        )
+
+
+@sql_count_checker(query_count=0, join_count=0)
+def test_series_loc_set_dataframe_item_negative(key_type):
+    series = native_pd.Series([1, 2, 3])
+    row_key = [
+        0,  # 0 does not exist in item, so the row values will be set to NULL
+        1,
+        2,
+    ]
+    item = native_pd.DataFrame(
+        [[10, 20, 30], [40, 50, 60], [70, 80, 90]],
+        columns=["A", "B", "C"],
+        index=[
+            3,  # 3 does not exist in the row key, so it will be skipped
+            2,
+            1,
+        ],
+    )
+
+    def key_converter(s):
+        _row_key = row_key
+        # Convert key to the required type.
+        if key_type == "index":
+            _row_key = pd.Index(_row_key)
+        elif key_type == "ndarray":
+            _row_key = np.array(_row_key)
+        elif key_type == "series":
+            _row_key = (
+                pd.Series(_row_key)
+                if isinstance(s, pd.Series)
+                else native_pd.Series(_row_key)
+            )
+        return _row_key
+
+    def loc_set_helper(s):
+        row_key = key_converter(s)
+        if isinstance(s, native_pd.Series):
+            s.loc[row_key] = item
+        else:
+            s.loc[pd.Series(row_key)] = pd.DataFrame(item)
+
+    eval_snowpark_pandas_result(
+        pd.Series(series),
+        series,
+        loc_set_helper,
+        inplace=True,
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Incompatible indexer with DataFrame",
+    )
+
+
+@pytest.mark.parametrize(
+    "row_key, item_values",
+    [
+        # Test single row (existing) and combinations of existing/new columns
+        ("a", 99),
+        # Test single row (new / not existing) and combinations of existing/new columns
+        ("x", 94),
+    ],
+)
+@pytest.mark.parametrize(
+    "ser_index",
+    [
+        # Test with unique index values
+        ["a", "b", "c", "d"],
+        # Test with duplicate index values
+        ["a", "a", "c", "d"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_set_scalar_row_key_enlargement(row_key, item_values, ser_index):
+    data = [1, 2, 3, 4]
+
+    snow_ser = pd.Series(data, index=ser_index)
+    native_ser = native_pd.Series(data, index=ser_index)
+
+    def set_loc_helper(ser):
+        ser.loc[row_key] = item_values
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "start",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+def test_series_loc_set_key_slice(start, stop, step):
+    ser_data = ["x", "y", "z", "w", "u"]
+    idx = [0, 1, 2, 3, 4]
+
+    native_ser = native_pd.Series(
+        ser_data, dtype="str", index=pd.Index(idx, dtype="int32")
+    )
+    snow_ser = pd.Series(native_ser)
+
+    key = slice(start, stop, step)
+
+    def set_loc_helper(ser):
+        ser.loc[key] = "new"
+
+    query_count, join_count = 1, 2
+    if key == slice(None):
+        join_count = 0
+    with SqlCounter(query_count=query_count, join_count=join_count):
+        eval_snowpark_pandas_result(snow_ser, native_ser, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize(
+    "start",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+def test_series_loc_set_key_slice_with_series(start, stop, step):
+    key = slice(start, stop, step)
+
+    ser_data = ["x", "y", "z", "w", "u"]
+    item_data = ["A", "B", "C", "D", "E"]
+    idx = [0, 1, 2, 3, 4]
+    native_ser = native_pd.Series(ser_data)
+    snow_ser = pd.Series(ser_data)
+
+    slice_len = len(native_pd.Series(idx).loc[key])
+
+    native_item_ser = native_pd.Series(
+        item_data[:slice_len],
+        dtype="str",
+        index=pd.Index(idx[:slice_len], dtype="int32"),
+    )
+
+    def set_loc_helper(ser):
+        ser.loc[key] = (
+            pd.Series(native_item_ser)
+            if isinstance(ser, pd.Series)
+            else native_item_ser
+        )
+
+    expected_join_count = 1 if not start and not stop and not step else 4
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        if slice_len == 0:
+            # pandas can fail in this case, so we skip call loc for it, see more below in
+            # test_series_loc_set_key_slice_with_series_item_pandas_bug
+            set_loc_helper(snow_ser)
+            # snow_ser should not change when slice_len = 0
+            assert_snowpark_pandas_equal_to_pandas(snow_ser, native_ser)
+        else:
+            eval_snowpark_pandas_result(
+                snow_ser, native_ser, set_loc_helper, inplace=True
+            )
+
+
+@pytest.mark.parametrize(
+    "start, stop, step, pandas_fail", [[1, -1, None, True], [10, None, None, False]]
+)
+@sql_count_checker(query_count=1, join_count=4)
+def test_series_loc_set_key_slice_with_series_item_pandas_bug(
+    start, stop, step, pandas_fail
+):
+    key = slice(start, stop, step)
+
+    ser_data = ["x", "y", "z", "w", "u"]
+    item_data = ["A", "B", "C", "D", "E"]
+    idx = [0, 1, 2, 3, 4]
+    native_ser = native_pd.Series(ser_data)
+    snow_ser = pd.Series(ser_data)
+
+    slice_len = len(native_pd.Series(idx).loc[key])
+    assert slice_len == 0
+
+    native_item_ser = native_pd.Series(
+        item_data[:slice_len],
+        dtype="str",
+        index=pd.Index(idx[:slice_len], dtype="int32"),
+    )
+
+    def set_loc_helper(ser):
+        ser.loc[key] = (
+            pd.Series(native_item_ser)
+            if isinstance(ser, pd.Series)
+            else native_item_ser
+        )
+
+    # pandas has bug when slice is None
+    if pandas_fail:
+        # File /opt/homebrew/Caskroom/miniconda/base/envs/snowpandas/lib/python3.8/site-packages/pandas/core/indexers/utils.py:181, in check_setitem_lengths(indexer, value, values)
+        #     178 if is_list_like(value):
+        #     179     if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:
+        #     180         # In case of two dimensional value is used row-wise and broadcasted
+        # --> 181         raise ValueError(
+        #     182             "cannot set using a slice indexer with a "
+        #     183             "different length than the value"
+        #     184         )
+        #     185     if not len(value):
+        #     186         no_op = True
+        #
+        # ValueError: cannot set using a slice indexer with a different length than the value
+        with pytest.raises(
+            ValueError,
+            match="cannot set using a slice indexer with a different length than the value",
+        ):
+            set_loc_helper(native_ser)
+        set_loc_helper(snow_ser)
+        assert_snowpark_pandas_equal_to_pandas(snow_ser, native_ser)
+    else:
+        eval_snowpark_pandas_result(snow_ser, native_ser, set_loc_helper, inplace=True)
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize(
+    "item", EMPTY_LIST_LIKE_VALUES[:-1]
+)  # ignore last element: empty series
+def test_series_loc_set_with_empty_key_and_empty_item_negative(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series.loc[empty list-like/series key] = empty list-like item
+    # -------------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # In Snowpark pandas we raise a ValueError because performing the check on the frontend to mimic pandas' behavior
+    # makes the code more complex and there are more cases to handle.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = "The length of the value/item to set is empty"
+    with pytest.raises(ValueError, match=err_msg):
+        native_ser.loc[key] = item
+        snowpark_ser.loc[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = item
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=4)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_series_loc_set_with_empty_key_and_empty_series_item(
+    key,
+    default_index_native_series,
+):
+    # series.loc[empty list-like/series key] = empty series item
+    # ----------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Snowpark pandas mirrors this behavior.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+    item = native_pd.Series([])
+
+    native_ser.loc[key] = item
+    snowpark_ser.loc[
+        pd.Series(key) if isinstance(key, native_pd.Series) else key
+    ] = pd.Series(item)
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_series_loc_set_with_empty_key_and_scalar_item(
+    key,
+    default_index_native_series,
+):
+    # series.loc[empty list-like/series key] = scalar item
+    # ----------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Snowpark pandas mirrors this behavior. If a string scalar item is used, the rest of the values in the series are
+    # converted to strings, like:
+    # >>> series.loc[[]] = "a"
+    # 0             1
+    # 1           1.1
+    # 2          true   <--- converted to string from boolean
+    # 3             a
+    # 4    2021-01-01
+    # 5           [1]
+    # 6           [1]
+    # dtype: object
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+    item = 32
+
+    native_ser.loc[key] = item
+    snowpark_ser.loc[
+        pd.Series(key) if isinstance(key, native_pd.Series) else key
+    ] = item
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize(
+    "item", [native_pd.Index([random.randint(0, 6) for _ in range(7)])]
+)
+def test_series_loc_set_with_empty_key_and_list_like_item_negative(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series.loc[empty list-like/series key] = list-like item
+    # -------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # >>> series
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    # >>> series.loc[[]] = [1, 2, 3]
+    # >>> series
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    # In Snowpark pandas we raise a ValueError because performing the check on the frontend to mimic pandas' behavior
+    # makes the code more complex and there are more cases to handle.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = (
+        "cannot set using a list-like indexer with a different length than the value"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        native_ser.loc[key] = item
+        snowpark_ser.loc[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = (pd.Series(item) if isinstance(item, native_pd.Series) else item)
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=4)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize(
+    "item", [native_pd.Series([random.randint(0, 6) for _ in range(7)])]
+)
+def test_series_loc_set_with_empty_key_and_series_item(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series.loc[empty list-like/series key] = series item
+    # ----------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Snowpark pandas mirrors this behavior.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    native_ser.loc[key] = item
+    snowpark_ser.loc[pd.Series(key) if isinstance(key, native_pd.Series) else key] = (
+        pd.Series(item) if isinstance(item, native_pd.Series) else item
+    )
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+def test_empty_series_col_loc():
+    native_ser = native_pd.Series()
+    snow_ser = pd.Series(native_ser)
+
+    def col_loc(series):
+        series.loc[:, 0] = 1
+
+    with SqlCounter(query_count=0):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            col_loc,
+            inplace=True,
+            expect_exception=IndexingError,
+            expect_exception_match="Too many indexers",
+        )
+
+
+@pytest.mark.parametrize("index", [True, False], ids=["with_index", "without_index"])
+class TestEmptySeriesLoc:
+    @sql_count_checker(query_count=1)
+    def test_empty_series_loc_slice(self, index):
+        kwargs = {}
+        if index:
+            kwargs["index"] = [0, 1, 2]
+
+        def row_loc_slice(series):
+            series.loc[:] = 1
+
+        native_ser = native_pd.Series(**kwargs)
+        snow_ser = pd.Series(native_ser)
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            row_loc_slice,
+            check_index_type=False,
+            inplace=True,
+        )
+
+    def test_empty_series_loc_scalar(self, index):
+        kwargs = {}
+        if index:
+            kwargs["index"] = [0, 1, 2]
+
+        def row_loc(series):
+            series.loc[0] = 1
+
+        native_ser = native_pd.Series(**kwargs)
+        snow_ser = pd.Series(native_ser)
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_ser,
+                native_ser,
+                row_loc,
+                inplace=True,
+            )
+
+        if index:
+
+            def row_loc_outside_index(series):
+                series.loc[11] = 1
+
+            native_ser = native_pd.Series(**kwargs)
+            snow_ser = pd.Series(native_ser)
+            with SqlCounter(query_count=1):
+                eval_snowpark_pandas_result(
+                    snow_ser,
+                    native_ser,
+                    row_loc_outside_index,
+                    inplace=True,
+                )
+
+
+@pytest.mark.parametrize("key", SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES)
+@pytest.mark.parametrize("item", [range(7)])
+@sql_count_checker(query_count=0)
+def test_series_loc_set_series_list_like_key_and_range_like_item_negative(
+    key, item, default_index_native_series
+):
+    # series.loc[series/list-like key] = range-like item
+    # --------------------------------------------------
+    # Ranges are treated like lists. This case is not implemented yet.
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series.loc[pd.Series([1, 2])] = range(5, 30, 23)
+    # >>> series
+    # 0     a
+    # 1     5
+    # 2    28
+    # 3     d
+    # dtype: object
+
+    snowpark_ser = pd.Series(default_index_native_series)
+    err_msg = "Currently do not support Series or list-like keys with range-like values"
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_ser.loc[key] = item
+
+
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", [native_pd.Series(["a", "abc", "ab", "abcd"])])
+@sql_count_checker(query_count=0)
+def test_series_loc_set_scalar_key_and_series_item_negative(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series.loc[scalar key] = series item
+    # ------------------------------------
+    # Using a scalar key and series item. Like the previous test, the item is assigned to only one element in the
+    # series at index `key`. The item behaves like a scalar value even though it's a Series object. This feature is
+    # currently not supported in Snowpark pandas - raises ValueError.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"])
+    # >>> series.loc["c"] = series     <-- does not work because it causes infinite recursion
+    # >>> series.loc["c"] = pd.Series(["a", "b"])
+    # >>> series
+    # 0                a
+    # 1                b
+    # c    0    a      <-- the series item is assigned as a single value to a particular index label
+    #      1    b
+    #      dtype: object
+    # dtype: object
+
+    snowpark_ser = pd.Series(mixed_type_index_native_series_mixed_type_index)
+    err_msg = "Currently do not support setting cell with list-like values"
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_ser.loc[key] = pd.Series(item)
+
+
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", [native_pd.DataFrame({"A": [34, 35], "B": [76, 77]})])
+@sql_count_checker(query_count=0)
+def test_series_loc_set_scalar_key_and_df_item_mixed_types_negative(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series.loc[scalar key] = df item
+    # --------------------------------
+    # Using a scalar key and df item. Like the previous test, the item is assigned to only one element in the
+    # series at index `key`. The item behaves like a scalar value even though it's a DataFrame object. This feature is
+    # currently not supported in Snowpark pandas - raises ValueError.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"])
+    # >>> series.loc["c"] = pd.DataFrame({"A": [34, 35], "B": [76, 77]})
+    # >>> series
+    # 0                   a
+    # 1                   b
+    # c        A   B      <-- the df item is assigned as a single value to a particular index label
+    #      0  34  76
+    #      1  35  77
+    # dtype: object
+
+    snowpark_ser = pd.Series(mixed_type_index_native_series_mixed_type_index)
+    err_msg = "Incompatible indexer with DataFrame"
+    with pytest.raises(ValueError, match=err_msg):
+        snowpark_ser.loc[key] = pd.DataFrame(item)  # nested df
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", [1, 12, [1, 2, 3], native_pd.Series([0, 4, 5])])
+def test_series_loc_set_slice_item_negative(key, default_index_native_series):
+    # series.loc[array-like/scalar key] = slice item
+    # ----------------------------------------------
+    # Here, slice is treated like a scalar object and assigned as itself to given key(s). This behavior is currently
+    # not supported in Snowpark pandas.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series.loc[range(2)] = slice(20, 30, 40)
+    # >>> series
+    # 0    slice(20, 30, 40)
+    # 1    slice(20, 30, 40)
+    # 2                    c
+    # 3                    d
+    # dtype: object
+
+    snowpark_ser = pd.Series(default_index_native_series)
+    item = slice(20, 30, 40)
+    err_msg = (
+        "Currently do not support assigning a slice value as if it's a scalar value"
+    )
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_ser.loc[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = item
+
+
+@pytest.mark.parametrize("key", [True, False, 0, 1])
+@pytest.mark.parametrize(
+    "index",
+    [
+        [0, 1, True, False, "x"],
+        [0, 1, True, "x"],
+        [1, True, False, "x"],
+        [1, True, "x"],
+        [0, 1, False, "x"],
+        [0, False, "x"],
+        [2, "x"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_set_boolean_key(key, index):
+    # series.loc[True/False key] = scalar item
+    # ----------------------------------------
+    item = 99
+
+    data = list(range(len(index)))
+
+    native_ser = native_pd.Series(data, index=index)
+    snow_ser = pd.Series(native_ser)
+
+    snow_ser.loc[key] = item
+
+    # In pandas, setitem for 0/False and 1/True will potentially set multiple values or fail to set at all if neither
+    # keys already exist in the index.  In Snowpark pandas we treat 0 & False, and 1 & True as distinct values, so
+    # they are independently settable, whether they exist or do not already exist in the series index.
+    try:
+        key_index = [str(v) for v in native_ser.index].index(str(key))
+        native_ser.iloc[key_index] = item
+    except ValueError:
+        native_ser = native_pd.concat(
+            [native_ser, native_pd.Series([item], index=[key])]
+        )
+
+    assert_series_equal(snow_ser, native_ser, check_dtype=False)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize("item", [1.2, None, ["a", "b", "c"]])
+def test_series_loc_set_df_key_negative(item, default_index_native_series):
+    # series.loc[df key] = any item
+    # -----------------------------
+    # A DataFrame is an invalid key when setting items in a Series.
+    df_key = native_pd.DataFrame([[1, 2]])
+
+    native_ser = default_index_native_series
+    snowpark_ser = pd.Series(native_ser)
+
+    # Native pandas error verification.
+    err_msg = "'int' object is not iterable"
+    with pytest.raises(TypeError, match=err_msg):
+        native_ser.loc[df_key] = item
+
+    # Snowpark pandas error verification.
+    err_msg = re.escape(
+        "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), "
+        "a.any() or a.all()."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        snowpark_ser.loc[pd.DataFrame(df_key)] = item
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize(
+    "key, item", [(lambda x: x < 3, [1, 2]), (lambda x: x == 0, 8.7)]
+)
+def test_series_loc_set_lambda_key(key, item):
+    # series.loc[lambda key] = any item
+    # ---------------------------------
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d", "e", "f"])
+    # >>> series.loc[lambda x: x < "d"] = "new val"
+    # >>> series
+    # 0    new val
+    # 1    new val
+    # 2    new val
+    # 3          d
+    # 4          e
+    # 5          f
+    # dtype: object
+    data = {"a": 1, "b": 2, "c": 3}
+    snowpark_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    native_ser.loc[key] = item
+    snowpark_ser.loc[key] = item
+
+    # Join is performed when the item is list-like - join index and list-like item for assignment.
+    # If item is scalar, no join is performed.
+    with SqlCounter(query_count=1, join_count=4 if isinstance(item, list) else 0):
+        assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+
+def test_series_loc_set_callable_key():
+    # series.loc[callable key] = any item
+    # -----------------------------------
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d", "e", "f"])
+    # >>> series.loc[series < "d"] = "new val"
+    # >>> series
+    # 0    new val
+    # 1    new val
+    # 2    new val
+    # 3          d
+    # 4          e
+    # 5          f
+    # dtype: object
+    ser = native_pd.Series(list(range(10)))
+
+    # Scalar item
+    # -----------
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # set every element divisible by 4 to 3
+    native_ser.loc[((native_ser + native_ser) % 4) == 0] = 3
+    snowpark_ser.loc[((snowpark_ser + snowpark_ser) % 4) == 0] = 3
+
+    # Using 3.14 instead of 3 in the above test, Snowpark pandas series has a mix of Decimal objects and floats in the
+    # result but native pandas is only floats.
+    with SqlCounter(query_count=1, join_count=0):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+    # Series item
+    # -----------
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # set every element divisible by 3 to Series([10, 11, 12, 13])
+    native_ser.loc[native_ser % 3 == 0] = native_pd.Series([10, 11, 12, 13])
+    snowpark_ser.loc[snowpark_ser % 3 == 0] = pd.Series([10, 11, 12, 13])
+
+    # Join is performed when the item is list-like - join index and list-like item for assignment.
+    # If item is scalar, no join is performed.
+    with SqlCounter(query_count=1, join_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize("key", [3, "t", -3.555])
+@pytest.mark.parametrize("item", [[1, 2, 3], ["a", "b", "c"], [26, "z", 2.6]])
+def test_series_loc_set_with_scalar_key_and_list_like_item(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series.loc[scalar key] = list-like item
+    # ---------------------------------------
+    # Feature not yet implemented in Snowpark pandas.
+    # In native pandas:
+    # >>> series
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Using key that is present in the index:
+    # >>> series.loc[3] = [1, 2, 3]
+    # >>> series
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3     [1, 2, 3]
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Using key not in the index:
+    # >>> series.loc["t"] = ["a", "b", "c"]
+    # >>> series
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3     [1, 2, 3]
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # t     [a, b, c]
+    # dtype: object
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = "Currently do not support setting cell with list-like values"
+    with pytest.raises(NotImplementedError, match=err_msg):
+        native_ser.loc[key] = item
+        snowpark_ser.loc[key] = item
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+def test_series_loc_set_with_scalar_key_and_scalar_item(
+    key,
+    item,
+):
+    # series.loc[scalar key] = scalar item
+    # ------------------------------------
+    # In Snowpark pandas, when the index, data, key, or item are of different types, type casting issues can occur.
+    # >>> native_ser = native_pd.Series(["a", "b", "c", "d", "e", "xyz", 23],
+    # ...              index=list(["0", "1", "2", "3", "4", "5", "6"]))
+    # >>> snowpark_ser = pd.Series(native_ser)
+    # >>> key = 0  # int value
+    # >>> item = 0  # int value
+    # Example 1:
+    # >>> native_ser.loc[0] = 0
+    # '0'      'a'
+    # '1'      'b'
+    # '2'      'c'
+    # '3'      'd'
+    # '4'      'e'
+    # '5'    'xyz'
+    # '6'       23
+    #   0        0   <---- new element in series
+    # dtype: object
+    # In native pandas above, the key 0 is added as a new value to the index which is int 0, the rest of the values are
+    # unchanged/retain type. The behavior of native pandas is to update the data at key if it exists or create a new
+    # index value with item as the new data.
+    # >>> snowpark_ser.loc[0] = 0
+    # 0.00000      0  <----- updated the original index "0" instead of making a new one
+    # 1.00000      b
+    # 2.00000      c
+    # 3.00000      d
+    # 4.00000      e
+    # 5.00000    xyz
+    # 6.00000     23
+    # dtype: object
+    # In Snowpark pandas, all index values are converted to np.int64 object values before assignment. So, index "0" is
+    # turned into a numeric value and treated as new index, therefore "0" is updated instead of creating a new 0 key.
+
+    # Sometimes, in Snowpark pandas if a string item is used, all the data in the series is converted to string type.
+    # In the case below, Snowpark pandas and native pandas have the same index (change or retain index in the same way)
+    # but the values in the series are different after locset.
+    # >>> native_ser = native_pd.Series(["a", "b", "c", "d", "e", "xyz", 23],
+    # ...              index=list(["0", "1", "2", "3", "4", "5", "6"]))
+    # >>> snowpark_ser = pd.Series(native_ser)
+    # >>> key = "0"  # str value
+    # >>> item = "xyz"  # str value
+    # Example 2:
+    # >>> native_ser.loc["0"] = "xyz"
+    # '0'      'xyz'
+    # '1'      'b'
+    # '2'      'c'
+    # '3'      'd'
+    # '4'      'e'
+    # '5'    'xyz'
+    # '6'       23
+    # dtype: object
+    # In native pandas above, the value at "0" is updated to by "xyz", the rest of the values stay the same.
+    # >>> snowpark_ser.loc["0"] = "xyz"
+    # '0'      'xyz'
+    # '1'      'b'
+    # '2'      'c'
+    # '3'      'd'
+    # '4'      'e'
+    # '5'    'xyz'
+    # '6'     '23'  <----- turned into string
+    # dtype: object
+    # In Snowpark pandas, the int values are converted to string because of Snowflake implicit type casting; 23 turns
+    # into "23".
+
+    # Using different series below to avoid Snowflake implicit type casting behavior or emulate casting behavior from
+    # Snowflake in native pandas.
+    if isinstance(item, str) and isinstance(key, numbers.Number):
+        # Snowflake tries to cast the index to numeric in this case - therefore use default index.
+        native_ser = native_pd.Series(["a", "b", "c", "d", "e", "xyz", "23"])
+    elif isinstance(item, str) and (key is None or isinstance(key, str)):
+        # Snowflake tries to cast the series and index to string in this case - therefore use a string series.
+        native_ser = native_pd.Series(
+            ["a", "b", "c", "d", "e", "xyz", "23"],
+            index=list(["0", "1", "2", "3", "4", "5", "6"]),
+        )
+    elif key is None or isinstance(key, str):
+        # Without the 23 as int in the series, Snowflake tries to cast the whole series to a numeric type or a string
+        # type. Avoid this by using a mixed series.
+        native_ser = native_pd.Series(
+            ["a", "b", "c", "d", "e", "xyz", 23],
+            index=list(["0", "1", "2", "3", "4", "5", "6"]),
+        )
+    else:
+        # Without the 0 as int in the series, Snowflake tries to cast the whole series' index to a numeric type.
+        # Avoid this by using a mixed index.
+        native_ser = native_pd.Series(
+            ["a", "b", "c", "d", "e", "xyz", 23],
+            index=list([0, "1", "2", "3", "4", "5", "6"]),
+        )
+    snowpark_ser = pd.Series(native_ser)
+    native_ser.loc[key] = item
+    snowpark_ser.loc[key] = item
+    assert_series_equal(snowpark_ser, native_ser, check_index_type=False)
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda s: s["2010"],
+        lambda s: s.loc["2010-11"],
+        lambda s: s["2010":"2011-10"],
+    ],
+)
+def test_series_partial_string_indexing(ops):
+    rng = native_pd.date_range("2010-10-01", "2011-12-31", freq="BME")
+    native_ts = native_pd.Series(np.random.randn(len(rng)), index=rng)
+    snowpark_ts = pd.Series(native_ts)
+
+    # need to set check_freq=False since Snowpark pandas index's freq is always null
+    eval_snowpark_pandas_result(snowpark_ts, native_ts, ops, check_freq=False)
+
+
+@pytest.mark.parametrize(
+    "ops, error",
+    [
+        [lambda s: s["2010"], True],
+        [lambda s: s.loc["2010-11"], True],
+        [lambda s: s["2010":"2011-10"], False],
+    ],
+)
+def test_series_non_partial_string_indexing_cases(ops, error):
+    # These are string values which should use exact match not partial string indexing
+    rng = native_pd.date_range("2010-10-01", "2011-12-31", freq="BME").astype(str)
+    native_str = native_pd.Series(np.random.randn(len(rng)), index=rng)
+    snowpark_str = pd.Series(native_str)
+
+    if error:
+        with pytest.raises(KeyError):
+            ops(native_str)
+        with SqlCounter(query_count=1, join_count=1):
+            assert len(ops(snowpark_str)) == 0
+    else:
+        with SqlCounter(query_count=1):
+            # need to set check_freq=False since Snowpark pandas index's freq is always null
+            eval_snowpark_pandas_result(snowpark_str, native_str, ops, check_freq=False)
+
+
+@sql_count_checker(query_count=2)
+def test_series_partial_string_indexing_behavior_diff():
+    native_series_minute = native_pd.Series(
+        [1, 2, 3],
+        pd.DatetimeIndex(
+            ["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"]
+        ),
+    )
+    series_minute = pd.Series(native_series_minute)
+
+    # In partial string indexing, pandas check the resolution of the dataframe and the string resolution, if the
+    # resolution is the same, pandas performs an exact match and return the value directly
+    assert native_series_minute.index.resolution == "minute"
+    native_res = native_series_minute["2011-12-31 23:59"]
+    assert native_res == 1
+
+    # While Snowpark pandas index does not maintain resolution, so it always return a series in this case:
+    snow_res = series_minute["2011-12-31 23:59"]
+    assert_series_equal(
+        snow_res,
+        native_pd.Series(
+            [1],
+            pd.DatetimeIndex(["2011-12-31 23:59:00"]),
+        ),
+        check_dtype=False,
+    )
+
+    # Similar to other cases, pandas raise error for out-of-bound key; while Snowpark pandas will return empty result.
+    with pytest.raises(KeyError):
+        native_series_minute["2022"]
+
+    assert len(series_minute["2022"]) == 0
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_loc_set_none():
+    # Note that pandas does not support df.loc[None,:] like the series does here.
+    native_s = native_pd.Series([1, 2, 3])
+
+    def loc_set_helper(s):
+        s.loc[None] = 100
+
+    eval_snowpark_pandas_result(
+        pd.Series(native_s), native_s, loc_set_helper, inplace=True
+    )
diff --git a/tests/integ/modin/series/test_mask.py b/tests/integ/modin/series/test_mask.py
new file mode 100644
index 00000000000..0a7a9162662
--- /dev/null
+++ b/tests/integ/modin/series/test_mask.py
@@ -0,0 +1,308 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_series_mask_with_cond_series():
+    data = range(5)
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 0))
+
+    cond = [True, False]
+    cond_snow_ser = pd.Series(cond)
+    cond_native_ser = native_pd.Series(cond)
+    other = 99
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(cond_snow_ser, other)
+        if isinstance(ser, pd.Series)
+        else ser.mask(cond_native_ser, other),
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_series_mask_with_cond_and_other_series():
+    data = range(5)
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 0))
+
+    cond = [True, False]
+    cond_snow_ser = pd.Series(cond)
+    cond_native_ser = native_pd.Series(cond)
+
+    other = [123.45, 54.321]
+    other_snow_ser = pd.Series(other)
+    other_native_ser = native_pd.Series(other)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(cond_snow_ser, other_snow_ser)
+        if isinstance(ser, pd.Series)
+        else ser.mask(cond_native_ser, other_native_ser),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-914228: Do not currently handle duplicates in index correctly"
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_mask_duplicate_labels():
+    data = [1, 2, 3, 4, 5]
+    index = ["a", "b", "c", "b", "a"]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 3))
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_series_mask_multi_index():
+    data = [1, 2, 3, 4, 5]
+    index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.mask(ser > 3))
+
+
+@pytest.mark.xfail(
+    reason="SNOW-914228: Do not currently handle duplicates in index correctly"
+)
+@sql_count_checker(query_count=8, join_count=1, fallback_count=1, sproc_count=1)
+def test_series_mask_index_no_names():
+    data = [1, 2, 3, 4, 5]
+    index = [None, None, None, None, None]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.mask(ser > 3, -ser)
+    )
+
+
+@sql_count_checker(query_count=4, join_count=1)
+def test_series_mask_with_np_array_cond():
+    data = [1, 2]
+    cond = np.array([True, False])
+
+    snow_ser = pd.Series(data=data)
+    native_ser = native_pd.Series(data=data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.mask(cond))
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_mask_with_series_cond_single_index_different_names():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+
+    snow_ser = pd.Series(data, index=pd.Index(["a", "b", "c"], name="Y"))
+    native_ser = native_pd.Series(data, index=pd.Index(["a", "b", "c"], name="Y"))
+
+    cond_snow_ser = pd.Series(cond, index=pd.Index(["a", "b", "c"], name="X"))
+    cond_native_ser = native_pd.Series(cond, index=pd.Index(["a", "b", "c"], name="X"))
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(cond_snow_ser, 99.9)
+        if isinstance(ser, pd.Series)
+        else ser.mask(cond_native_ser, 99.9),
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_mask_with_duplicated_index_aligned():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    index = pd.Index(["a", "a", "c"], name="index")
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    cond_snow_ser = pd.Series(cond, index=index)
+    cond_native_ser = native_pd.Series(cond, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(cond_snow_ser, 99)
+        if isinstance(ser, pd.Series)
+        else ser.mask(cond_native_ser, 99),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_mask_with_lambda_cond():
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(lambda x: x >= 6, 99),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_series_mask_with_lambda_returns_singleton_should_fail():
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.mask(lambda x: True, 99),
+        expect_exception=True,
+        expect_exception_match="Array conditional must be same shape as self",
+        expect_exception_type=ValueError,
+        assert_exception_equal=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "other, sql_count, join_count",
+    [(lambda x: -x.iloc[0], 5, 3), (lambda x: x**2, 4, 2)],
+)
+def test_series_mask_with_lambda_other(other, sql_count, join_count):
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+    with SqlCounter(query_count=sql_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.mask([True, False, True, False], other),
+        )
+
+
+@pytest.mark.parametrize("cond", [1, [1]], ids=["scalar_cond", "scalar_cond_in_list"])
+def test_series_mask_with_scalar_cond(cond):
+    native_ser = native_pd.Series([1, 2, 3])
+    snow_ser = pd.Series(native_ser)
+
+    sql_count = 1 if isinstance(cond, list) else 0
+
+    with SqlCounter(query_count=sql_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.mask(cond, 1),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Array conditional must be same shape as self",
+            assert_exception_equal=True,
+        )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_mask_series_cond_unmatched_index():
+    data = [1, 2, 3, 4]
+    index1 = [0, 1, 2, 3]
+    index2 = [4, 5, 6, 7]
+
+    snow_ser = pd.Series(data, index=index1)
+    snow_cond = pd.Series([True, False, True, False], index=index2)
+
+    native_ser = native_pd.Series(data, index=index1)
+    native_cond = native_pd.Series([True, False, True, False], index=index2)
+
+    def perform_mask(series):
+        if isinstance(series, pd.Series):
+            return series.mask(snow_cond, -1)
+        else:
+            return series.mask(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_mask,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_mask_short_series_cond(index):
+    data = [1, 2, 3, 4]
+    if index != "matched_index":
+        index = [7, 8, 9]
+    else:
+        index = None
+
+    snow_ser = pd.Series(data)
+    snow_cond = pd.Series([True, False, True], index=index)
+
+    native_ser = native_pd.Series(data)
+    native_cond = native_pd.Series([True, False, True], index=index)
+
+    def perform_mask(series):
+        if isinstance(series, pd.Series):
+            return series.mask(snow_cond, -1)
+        else:
+            return series.mask(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_mask,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_mask_long_series_cond(index):
+    data = [1, 2, 3, 4]
+    if index != "matched_index":
+        index = [7, 8, 9, 10, 11]
+    else:
+        index = None
+
+    snow_ser = pd.Series(data)
+    snow_cond = pd.Series([True, False, True, False, True], index=index)
+
+    native_ser = native_pd.Series(data)
+    native_cond = native_pd.Series([True, False, True, False, True], index=index)
+
+    def perform_mask(series):
+        if isinstance(series, pd.Series):
+            return series.mask(snow_cond, -1)
+        else:
+            return series.mask(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_mask,
+    )
diff --git a/tests/integ/modin/series/test_name.py b/tests/integ/modin/series/test_name.py
new file mode 100644
index 00000000000..813fa57e806
--- /dev/null
+++ b/tests/integ/modin/series/test_name.py
@@ -0,0 +1,74 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+@pytest.mark.parametrize(
+    "sample, expected_query_count",
+    [
+        (native_pd.Series([1, 2, 3], name="abc"), 1),
+        (native_pd.Index([1, 2, 3], name="abc"), 1),
+        (native_pd.Index([], name="abc"), 1),
+        (
+            native_pd.Index(
+                [("a", "b"), ("a", "c")], tupleize_cols=False, name="('a', 'b')"
+            ),
+            1,
+        ),  # index with tuple values
+    ],
+)
+def test_create_series_from_object_with_name(sample, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        # name in sample will be kept as series name
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            pd.Series(sample), native_pd.Series(sample)
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_series_with_tuple_name():
+    names = [("a", 1), ("a", "b", "c"), "flat"]
+    s = pd.Series(name=names[0])
+    # The internal representation of the Series stores the name as a column label.
+    # When it is a tuple, this label is a MultiIndex object, and this test ensures that
+    # the Series's name remains a tuple.
+    assert s.name == names[0]
+    assert isinstance(s.name, tuple)
+    # Setting the name to a tuple of a different level or a non-tuple should not error.
+    s.name = names[1]
+    assert s.name == names[1]
+    assert isinstance(s.name, tuple)
+    s.name = names[2]
+    assert s.name == names[2]
+    assert isinstance(s.name, str)
+
+
+@pytest.mark.parametrize(
+    "multiindex",
+    [
+        native_pd.Index([("a", "b"), ("a", "c")], name=("a", "b")),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_create_series_from_mi_negative(multiindex):
+    # name in sample will be kept as series name
+    with pytest.raises(
+        NotImplementedError,
+        match="initializing a Series from a MultiIndex is not supported",
+    ):
+        native_pd.Series(multiindex)
+    with pytest.raises(
+        NotImplementedError,
+        match="initializing a Series from a MultiIndex is not supported",
+    ):
+        pd.Series(multiindex)
diff --git a/tests/integ/modin/series/test_ndim.py b/tests/integ/modin/series/test_ndim.py
new file mode 100644
index 00000000000..ec1522cf98f
--- /dev/null
+++ b/tests/integ/modin/series/test_ndim.py
@@ -0,0 +1,33 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "series_input",
+    [
+        {"A": [[1]]},
+        None,
+    ],
+    ids=[
+        "list entry",
+        "empty column",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_ndim(series_input):
+    eval_snowpark_pandas_result(
+        pd.Series(series_input),
+        native_pd.Series(series_input),
+        lambda df: df.empty,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/series/test_nunique.py b/tests/integ/modin/series/test_nunique.py
new file mode 100644
index 00000000000..bb20e9e4a53
--- /dev/null
+++ b/tests/integ/modin/series/test_nunique.py
@@ -0,0 +1,73 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_values_equal,
+    create_test_series,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        [0, 1, 2, 3],
+        [0.1, 0.2, 0.1, 0],
+        [None, 0, None, 0],
+        [None, None, None, None],
+        [],
+        ["B", "A", "A", "C", "B"],
+        ["B", None, "A", "A", "C", None, "B"],
+        [True] * 10,
+        ["A"] * 10,
+        [0] * 10,
+        [True, None, False, True, None],
+        [1.1, "a", None] * 4,
+        [native_pd.to_datetime("2023-12-01"), native_pd.to_datetime("1999-09-09")] * 2,
+    ],
+)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=1)
+def test_series_nunique(input_data, dropna):
+    eval_snowpark_pandas_result(
+        *create_test_series(input_data),
+        lambda ser: ser.nunique(dropna=dropna),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "input_data,expected",
+    [([pd.NaT, np.nan, pd.NA], 1), ([pd.NaT, np.nan, pd.NA, 7, None], 2)],
+)
+@sql_count_checker(query_count=1)
+def test_series_nunique_deviating_nan_behavior(input_data, expected):
+    # Snowpark pandas regards pd.NaT, pd.NA, np.na and None to be the same NaN value
+    assert pd.Series(input_data).nunique(dropna=False) == expected
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pytest.param(None, id="default_index"),
+        pytest.param(
+            [["bar", "bar", "baz", "foo"], ["one", "two", "one", "two"]], id="2D_index"
+        ),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_dataframe_nunique_multiindex(index):
+    data = [0.1, 0.2, 0.1, 0]
+    eval_snowpark_pandas_result(
+        *create_test_series(data, index=index),
+        lambda ser: ser.nunique(),
+        comparator=assert_values_equal,
+    )
diff --git a/tests/integ/modin/series/test_quantile.py b/tests/integ/modin/series/test_quantile.py
new file mode 100644
index 00000000000..58111305e6a
--- /dev/null
+++ b/tests/integ/modin/series/test_quantile.py
@@ -0,0 +1,162 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_almost_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+NUMERIC_DATA = [-5, -2, -1, 0, 1, 3, 4, 5]
+DATETIME_DATA = [
+    pd.NaT,
+    pd.Timestamp("1940-04-25"),
+    pd.Timestamp("2000-10-10"),
+    pd.Timestamp("2020-12-31"),
+]
+
+
+@pytest.mark.parametrize(
+    "q",
+    [
+        [0.1, 0.2, 0.8],
+        [0.2, 0.8, 0.1],  # output will not be sorted by quantile
+    ],
+)
+@sql_count_checker(query_count=1, union_count=2)
+def test_quantile_basic(q):
+    snow_ser = pd.Series(NUMERIC_DATA)
+    native_ser = native_pd.Series(NUMERIC_DATA)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda df: df.quantile(q),
+    )
+
+
+@sql_count_checker(query_count=1, union_count=4)
+def test_quantile_withna():
+    # nans in the data do not affect the quantile
+    data = [np.nan, 25, 0, 75, 50, 100, np.nan]
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+    q = [0, 0.25, 0.5, 0.75, 1]
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.quantile(q))
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1062839: PERCENTILE_DISC does not give desired behavior",
+    strict=True,
+    raises=AssertionError,
+)
+@sql_count_checker(query_count=1)
+def test_quantile_nearest_negative():
+    q = [0.1, 0.2, 0.8]
+    snow_ser = pd.Series(NUMERIC_DATA)
+    native_ser = native_pd.Series(NUMERIC_DATA)
+    # The calculated value for the 0.1 quantile differs -- pandas reports -2, while
+    # Snowflake reports -5. Since there are 8 elements, -2 would be at the ~0.14 percentile in
+    # the input array, meaning Snowflake's result is incorrect.
+    #
+    # This may be a bug with PERCENTILE_DISC in Snowflake SQL (which claims to pick the nearest
+    # value), as the following query will produce -5:
+    #
+    # SELECT PERCENTILE_DISC(0.1) WITHIN GROUP(ORDER BY column1
+    #     FROM VALUES (-5), (-2), (-1), (0), (1), (3), (4), (5);
+    #
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda df: df.quantile(q, interpolation="nearest"),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_quantile_returns_scalar():
+    snow_ser = pd.Series(NUMERIC_DATA)
+    native_ser = native_pd.Series(NUMERIC_DATA)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda df: df.quantile(0.1),
+        comparator=assert_almost_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_quantile_empty_args():
+    # by default, returns the median (q=0.5)
+    snow_ser = pd.Series(NUMERIC_DATA)
+    native_ser = native_pd.Series(NUMERIC_DATA)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda df: df.quantile(),
+        comparator=assert_almost_equal,
+    )
+
+
+def test_quantile_empty_ser():
+    # ser.quantile() where ser is empty should still have the correct columns
+    snow_ser = pd.Series([], dtype=int)
+    native_ser = native_pd.Series([], dtype=int)
+    with SqlCounter(query_count=1):
+        # should return nan
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda df: df.quantile(0.1),
+            comparator=assert_almost_equal,
+        )
+    with SqlCounter(query_count=1):
+        # should return series of NaNs
+        eval_snowpark_pandas_result(
+            snow_ser, native_ser, lambda df: df.quantile([0.1, 0.2])
+        )
+
+
+@pytest.mark.parametrize("interpolation", ["lower", "higher", "midpoint"])
+@sql_count_checker(query_count=0)
+def test_quantile_unsupported_args_negative(interpolation):
+    snow_ser = pd.Series(NUMERIC_DATA)
+    with pytest.raises(NotImplementedError):
+        snow_ser.quantile(interpolation=interpolation),
+
+
+@sql_count_checker(query_count=0)
+def test_quantile_datetime_negative():
+    # Snowflake PERCENTILE_* functions do not operate on datetimes, so it should fail
+    snow_ser = pd.Series(DATETIME_DATA)
+    with pytest.raises(NotImplementedError):
+        snow_ser.quantile()
+
+
+@pytest.mark.xfail(
+    reason="Bug in quantile emitting large amount of queries except for small data. TODO: SNOW-1229442"
+)
+def test_quantile_large():
+    snow_series = pd.Series(range(1000))
+    q = np.linspace(0, 1, 16)
+
+    # actual query count for this 81. This seems like a bug.
+    with SqlCounter(query_count=1):
+        ans = snow_series.quantile(q, "linear").to_pandas()
+
+    assert len(ans) == len(q)
+
+
+@pytest.mark.parametrize("q", [-10.0, 8.3])
+@sql_count_checker(query_count=0)
+def test_quantile_out_of_0_1_negative(q):
+    snow_series = pd.Series(range(10))
+
+    with pytest.raises(
+        ValueError, match=re.escape("percentiles should all be in the interval [0, 1]")
+    ):
+        snow_series.quantile(q)
diff --git a/tests/integ/modin/series/test_rank.py b/tests/integ/modin/series/test_rank.py
new file mode 100644
index 00000000000..3a855f1142d
--- /dev/null
+++ b/tests/integ/modin/series/test_rank.py
@@ -0,0 +1,107 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+TEST_RANK_DATA = [
+    ({"a": [1, 2, 2, 2, 3, 3, 3]}, None),
+    ({"a": [4, -2, 4, 8, 3]}, None),
+    ({"Animal": ["cat", "penguin", "dog", "spider", "snake", "dog", "bear"]}, None),
+    ({"a": [1, 2, np.nan, 2, 3, np.nan, 3]}, None),
+    ({"a": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}, None),
+    (
+        {"Value": [4, -2, 4, 8]},
+        native_pd.MultiIndex.from_product(
+            [["A", "B"], ["C", "D"]], names=["Index1", "Index2"]
+        ),
+    ),
+]
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test Series.rank with all method, na_option, ascending parameter combinations
+def test_series_rank(data, index, method, ascending, na_option):
+    snow_series = pd.Series(data, index=index)
+    native_series = native_pd.Series(data, index=index)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda df: df.rank(method=method, ascending=ascending, na_option=na_option),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test Series.rank numeric_only
+def test_series_rank_numeric_only(method, ascending, na_option):
+    test_rank_data = {
+        "Animal": ["cat", "penguin", "dog", "spider", "snake", "dog", "bear"],
+    }
+    snow_series = pd.DataFrame(test_rank_data)
+    native_series = native_pd.DataFrame(test_rank_data)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda df: df.rank(
+            method=method, ascending=ascending, na_option=na_option, numeric_only=True
+        ),
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("data, index", TEST_RANK_DATA)
+@pytest.mark.parametrize(
+    "method",
+    ["min", "dense", "first", "max", "average"],
+)
+@pytest.mark.parametrize(
+    "ascending",
+    [True, False],
+)
+@pytest.mark.parametrize(
+    "na_option",
+    ["keep", "top", "bottom"],
+)
+# test Series percentile rank
+def test_df_rank_pct(data, index, method, ascending, na_option):
+    snow_df = pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    native_df = native_pd.DataFrame(data, index=index).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=True
+    )
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(snow_df, native_df)
diff --git a/tests/integ/modin/series/test_rename.py b/tests/integ/modin/series/test_rename.py
new file mode 100644
index 00000000000..ba3187b06cb
--- /dev/null
+++ b/tests/integ/modin/series/test_rename.py
@@ -0,0 +1,219 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import logging
+import re
+from datetime import datetime
+
+import modin.pandas as pd
+import numpy as np
+import pandas._testing as tm
+import pytest
+from modin.pandas import Index, MultiIndex, Series
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_series_equal
+
+
+class TestRename:
+    @pytest.fixture(scope="function")
+    def snow_datetime_series(self, datetime_series):
+        return pd.Series(datetime_series)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    def test_rename(self, snow_datetime_series):
+        ts = snow_datetime_series
+
+        def renamer(x):
+            return x.strftime("%Y%m%d")
+
+        with SqlCounter(query_count=9, fallback_count=1, sproc_count=1):
+            renamed = ts.rename(renamer)
+            assert renamed.index[0] == renamer(ts.index[0])
+
+        # dict
+        with SqlCounter(query_count=4, join_count=1):
+            rename_dict = dict(zip(ts.index, renamed.index))
+            renamed2 = ts.rename(rename_dict)
+            # Note: renaming index with dict on Snowflake will use variant as the new data type if rename includes type
+            # change, e.g., here the dict turns datetime values into strings. When pulling the variant index out, the string
+            # values in the variant column will be quoted
+            assert_index_equal(renamed.index, renamed2.index.str.replace('"', ""))
+
+    @sql_count_checker(query_count=1, join_count=1)
+    def test_rename_partial_dict(self):
+        # partial dict
+        ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
+        renamed = ser.rename({"b": "foo", "d": "bar"})
+        assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
+
+    @sql_count_checker(query_count=2, join_count=1)
+    def test_rename_retain_index_name(self):
+        # index with name
+        renamer = Series(
+            np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
+        )
+        renamed = renamer.rename({})
+        assert renamed.index.name == renamer.index.name
+
+    @sql_count_checker(query_count=2, join_count=1)
+    def test_rename_by_series(self):
+        ser = Series(range(5), name="foo")
+        renamer = Series({1: 10, 2: 20})
+        result = ser.rename(renamer)
+        expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
+        assert_series_equal(result, expected)
+
+    def test_rename_set_name(self):
+        ser = Series(range(4), index=list("abcd"))
+        for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
+            with SqlCounter(query_count=2):
+                result = ser.rename(name)
+                assert result.name == name
+                tm.assert_numpy_array_equal(result.index.values, ser.index.values)
+                assert ser.name is None
+
+    @sql_count_checker(query_count=5)
+    def test_rename_set_name_inplace(self):
+        ser = Series(range(3), index=list("abc"))
+        for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
+            ser.rename(name, inplace=True)
+            assert ser.name == name
+
+            exp = np.array(["a", "b", "c"], dtype=np.object_)
+            tm.assert_numpy_array_equal(ser.index.values, exp)
+
+    @sql_count_checker(query_count=0)
+    def test_rename_axis_supported(self):
+        # Supporting axis for compatibility, detailed in GH-18589
+        ser = Series(range(5))
+        ser.rename({}, axis=0)
+        ser.rename({}, axis="index")
+
+        with pytest.raises(ValueError, match="No axis named 5"):
+            ser.rename({}, axis=5)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+    def test_rename_inplace(self, snow_datetime_series):
+        def renamer(x):
+            return x.strftime("%Y%m%d")
+
+        expected = renamer(snow_datetime_series.index[0])
+
+        snow_datetime_series.rename(renamer, inplace=True)
+        assert snow_datetime_series.index[0] == expected
+
+    @sql_count_checker(query_count=0)
+    def test_rename_with_custom_indexer(self):
+        # GH 27814
+        class MyIndexer:
+            pass
+
+        ix = MyIndexer()
+        ser = Series([1, 2, 3]).rename(ix)
+        assert ser.name is ix
+
+    @sql_count_checker(query_count=0)
+    def test_rename_with_custom_indexer_inplace(self):
+        # GH 27814
+        class MyIndexer:
+            pass
+
+        ix = MyIndexer()
+        ser = Series([1, 2, 3])
+        ser.rename(ix, inplace=True)
+        assert ser.name is ix
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+    def test_rename_callable(self):
+        # GH 17407
+        ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
+        result = ser.rename(str)
+        expected = ser.rename(lambda i: str(i))
+        assert_series_equal(result, expected)
+
+        assert result.name == expected.name
+
+    @sql_count_checker(query_count=2)
+    def test_rename_none(self):
+        # GH 40977
+        ser = Series([1, 2], name="foo")
+        result = ser.rename(None)
+        expected = Series([1, 2])
+        assert_series_equal(result, expected)
+
+    # TODO: will reenable this test once MI support is ready.
+    @pytest.mark.skip(reason="TODO: SNOW-841607 support multiindex in join_utils.join")
+    def test_rename_series_with_multiindex(self):
+        # issue #43659
+        arrays = [
+            ["bar", "baz", "baz", "foo", "qux"],
+            ["one", "one", "two", "two", "one"],
+        ]
+
+        index = MultiIndex.from_arrays(arrays, names=["first", "second"])
+        ser = Series(np.ones(5), index=index)
+        # Note: it seems a bug that if the index is a series, pands return KeyError: "['yes'] not found in axis"
+        result = ser.rename(index={"one": "yes"}, level="second", errors="raise")
+
+        arrays_expected = [
+            ["bar", "baz", "baz", "foo", "qux"],
+            ["yes", "yes", "two", "two", "yes"],
+        ]
+
+        index_expected = MultiIndex.from_arrays(
+            arrays_expected, names=["first", "second"]
+        )
+        series_expected = Series(np.ones(5), index=index_expected)
+
+        assert_series_equal(result, series_expected)
+
+    @sql_count_checker(query_count=2, join_count=1)
+    def test_rename_error_arg(self):
+        # GH 46889
+        ser = Series(["foo", "bar"])
+        match = re.escape("[2] not found in axis")
+        with pytest.raises(KeyError, match=match):
+            ser.rename({2: 9}, errors="raise")
+
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @sql_count_checker(query_count=8, join_count=12)
+    def test_rename_copy_false(self):
+        # GH 46889
+        ser = Series(["foo", "bar"])
+        shallow_copy = ser.rename({1: 9}, copy=False)
+        # copy=False is ignored by Snowpark pandas; in pandas, ser[0] will be "foobar"
+        # TODO: SNOW-917761 implement ser[0]
+        assert ser[0] == shallow_copy[0]
+        assert ser[1] == shallow_copy[9]
+
+    @sql_count_checker(query_count=0)
+    def test_rename_copy_warning(self, caplog):
+        caplog.at_level(logging.WARNING)
+        ser = Series(["foo", "bar"])
+        msg = "The argument `copy` of `series.rename` has been ignored by Snowpark pandas API"
+        caplog.clear()
+        ser.rename("test")
+        assert msg not in caplog.text
+
+        ser.rename("test", copy=False)
+        assert msg in caplog.text
diff --git a/tests/integ/modin/series/test_replace.py b/tests/integ/modin/series/test_replace.py
new file mode 100644
index 00000000000..cf01020faf8
--- /dev/null
+++ b/tests/integ/modin/series/test_replace.py
@@ -0,0 +1,203 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import no_default
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.fixture
+def snow_series():
+    return pd.Series(["one", "two", "three", pd.NA, None], name="col1")
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("one", "ONE"),  # scalar -> scalar
+        ("one", pd.NA),  # scalar -> NULL
+        ("one", None),  # scalar -> None
+        (pd.NA, "ONE"),  # NULL -> scalar
+        (pd.NaT, "ONE"),  # NULL -> scalar
+        (np.NaN, "ONE"),  # NULL -> scalar
+        (["one"], ["ONE"]),  # list -> list
+        ("four", "FOUR"),  # no matching value
+        (["one", "two"], ["two", "one"]),  # swap values
+        (["one", "two"], "not_three"),  # list -> scalar
+        (["one", "two"], ("ONE", "TWO")),  # list -> tuple
+        (("one", "two"), ["ONE", "TWO"]),  # tuple -> list
+        ({"one": "ONE", "two": "TWO"}, no_default),  # dict -> no_default
+    ],
+)
+@pytest.mark.parametrize("regex", [True, False])
+@sql_count_checker(query_count=2)
+def test_replace_all_columns(to_replace, value, snow_series, regex):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace, value, regex=regex),
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("^on.", "ONE"),  # scalar -> scalar
+        ("^on.", pd.NA),  # scalar -> NULL
+        ("^on.", None),  # scalar -> None
+        (["^on."], ["ONE"]),  # list -> list
+        ("^fou.", "FOUR"),  # no matching value
+        ([r"^on.$", "^tw."], ["two", "one"]),  # swap values
+        (["^on.", "^tw."], "not_three"),  # list -> scalar
+        (["^on.", "^tw."], ("ONE", "TWO")),  # list -> tuple
+        (["^on.", "^tw."], ["ONE", "TWO"]),  # list -> list
+        ({"^on.": "ONE", "^tw.": "TWO"}, no_default),  # dict -> no_default
+    ],
+)
+@sql_count_checker(query_count=4)
+def test_replace_regex(to_replace, value, snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace, value, regex=True),
+    )
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(regex=to_replace, value=value),
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ("one", {"col1": "A_ONE", "col2": "B_ONE"}),  # scalar -> dict
+        (pd.NA, {"col1": "A_ABC", "col2": "B_ABC"}),  # NULL -> dict
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_replace_value_dict_negative(to_replace, value, snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace, value),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="In Series.replace 'to_replace' must be None if the 'value' is dict-like",
+        assert_exception_equal=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        ({"col1": "one", "col2": "abc"}, "NEW"),  # dict -> scalar
+        ({"col1": "one", "col2": "abc"}, None),  # dict -> None
+        ({"col1": "one", "col2": "abc"}, pd.NA),  # dict -> NULL
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_replace_to_replace_dict_negative(to_replace, value, snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace, value),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="In Series.replace 'to_replace' cannot be dict-like if 'value' is provided",
+        assert_exception_equal=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_replace_method_negative(snow_series):
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas replace API does not support 'method' parameter",
+    ):
+        snow_series.replace("abc", "ABC", method="pad")
+
+
+@sql_count_checker(query_count=0)
+def test_replace_limit_negative(snow_series):
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas replace API does not support 'limit' parameter",
+    ):
+        snow_series.replace("abc", "ABC", limit=10)
+
+
+@sql_count_checker(query_count=0)
+def test_replace_no_value_negative(snow_series):
+    # pandas will not raise error instead uses 'pad' method to replace values.
+    with pytest.raises(ValueError, match="Explicitly specify the new values instead"):
+        snow_series.replace(to_replace="abc")
+
+
+@sql_count_checker(query_count=1)
+def test_non_bool_regex_negative(snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace="abc", value="ABC", regex="abc"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="'to_replace' must be 'None' if 'regex' is not a bool",
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value",
+    [
+        (["abc", "xyz"], ["NEW"]),
+        (["abc", "xyz"], ["ABC", "XYZ", "NEW"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_replace_length_mismatch_negative(snow_series, to_replace, value):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda df: df.replace(to_replace, value),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Replacement lists must match in length",
+    )
+
+
+@pytest.mark.parametrize(
+    "to_replace, value, regex",
+    [
+        ("one", "ONE", False),  # scalar -> scalar
+        (["one"], ["ONE"], False),  # list -> list
+        ({"one": "ONE", "two": "TWO"}, no_default, False),  # dict -> no_default
+        ("^on.", "ONE", True),  # scalar -> scalar regex
+        (["^on."], ["ONE"], True),  # list -> list  regex
+        ({"^on.": "ONE", "^tw.": "TWO"}, no_default, True),  # dict -> no_default
+    ],
+)
+@pytest.mark.parametrize(
+    "index",
+    [
+        [[1, 2, 3, 4, 5]],
+        [["one", "two", "three", "four", "five"]],
+        native_pd.MultiIndex.from_tuples(
+            [("one", 1), ("two", 2), ("three", 3), ("four", 4), ("five", 5)]
+        ),
+    ],
+)
+def test_replace_index(snow_series, to_replace, value, regex, index):
+    snow_series = snow_series.set_axis(index)
+    expected_join_count = 4 if isinstance(index, native_pd.MultiIndex) else 2
+    with SqlCounter(query_count=2, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            snow_series,
+            snow_series.to_pandas(),
+            lambda df: df.replace(to_replace, value, regex=regex),
+        )
diff --git a/tests/integ/modin/series/test_repr.py b/tests/integ/modin/series/test_repr.py
new file mode 100644
index 00000000000..e5061e66ff8
--- /dev/null
+++ b/tests/integ/modin/series/test_repr.py
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.conftest import IRIS_DF
+from tests.integ.modin.sql_counter import SqlCounter
+
+SERIES_TO_TEST = [(IRIS_DF.iloc[:, i], 1, 0) for i in range(IRIS_DF.shape[1])] + [
+    (native_pd.Series(), 1, 0),
+    (native_pd.Series(list(range(10000))), 6, 0),
+    (
+        native_pd.Series(list(range(10000)), index=np.random.randint(-100, 100, 10000)),
+        6,
+        0,
+    ),
+    (
+        native_pd.Series(
+            [1, 2, 3], index=pd.MultiIndex.from_tuples([(1, 2), (3, 2), (4, 4)])
+        ),
+        1,
+        0,
+    ),
+    (native_pd.Series(index=list(range(10000))), 6, 0),
+]
+
+
+@pytest.mark.parametrize(
+    "native_series, expected_query_count, expected_join_count", SERIES_TO_TEST
+)
+def test_repr(native_series, expected_query_count, expected_join_count):
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        snow_df = pd.Series(native_series)
+        native_str = repr(native_series)
+        snow_str = repr(snow_df)
+
+        # because of type mismatch, replace in last line dtype for int8, int16, int32 with int64
+        def replace_dtype_info(s):
+            return (
+                s.replace("dtype: int16", "dtype: int64")
+                .replace("dtype: int8", "dtype: int64")
+                .replace("dtype: int32", "dtype: int64")
+            )
+
+        assert native_str == replace_dtype_info(snow_str)
diff --git a/tests/integ/modin/series/test_round.py b/tests/integ/modin/series/test_round.py
new file mode 100644
index 00000000000..6d6827035be
--- /dev/null
+++ b/tests/integ/modin/series/test_round.py
@@ -0,0 +1,131 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+all_decimals = pytest.mark.parametrize("decimals", [-2, -1, 0, 1, 2])
+zero_only_decimals = pytest.mark.parametrize("decimals", [0])
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round(decimals):
+    data = [10, 1, 1.6]
+
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_mixed_dtypes(decimals):
+    data = [100000, math.e, np.float16(13.22), np.int32(32323123)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_index(decimals):
+    data = [-100000, math.pi, 3]
+    native_ser = native_pd.Series(data=data, index=["a", "b", "c"])
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_np_types(decimals):
+    data = [-np.int16(1), 1, 1.6]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_neg_odd_half(decimals):
+    data = [-np.double(1.5), -np.double(3.5), -np.double(5.5)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@zero_only_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_neg_even_half(decimals):
+    data = [-np.double(2.5), -np.double(4.5), -np.double(6.5)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        round(snow_ser, decimals),
+        round(native_pd.Series(native_ser), decimals) - 1,
+    )
+
+
+@all_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_pos_odd_half(decimals):
+    data = [np.double(1.5), np.double(3.5), np.double(5.5)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.round(decimals))
+
+
+@zero_only_decimals
+@sql_count_checker(query_count=1)
+def test_ser_round_pos_even_half(decimals):
+    data = [np.double(2.5), np.double(4.5), np.double(6.5)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        round(snow_ser, decimals),
+        round(native_pd.Series(native_ser), decimals) + 1,
+    )
+
+
+@all_decimals
+@pytest.mark.parametrize(
+    "invalid_value, expected_sf_error",
+    [
+        ([None, True], "Invalid argument types for function"),
+        ([False, True, None], "Invalid argument types for function"),
+        ([(3, 3), 2], "Failed to cast variant value"),
+        ("bad_str", "is not recognized"),
+        ([3, "bad_str"], "Failed to cast variant value"),
+        ([3, [2, 3]], "Failed to cast variant value"),
+    ],
+)
+def test_ser_round_invalid_in_sf_negative(decimals, invalid_value, expected_sf_error):
+    # testing and documenting behaviors that work in native
+    # pandas but not in SF
+    native_ser = native_pd.Series(invalid_value)
+    snow_ser = pd.Series(native_ser)
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException, match=expected_sf_error):
+            round(snow_ser, decimals).to_pandas()
diff --git a/tests/integ/modin/series/test_sample.py b/tests/integ/modin/series/test_sample.py
new file mode 100644
index 00000000000..39ae6d460d8
--- /dev/null
+++ b/tests/integ/modin/series/test_sample.py
@@ -0,0 +1,87 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+@pytest.fixture(params=[True, False])
+def ignore_index(request):
+    return request.param
+
+
+@pytest.mark.parametrize("n", [None, 0, 1, 8, 10])
+@sql_count_checker(query_count=4)
+def test_series_sample_n(n, ignore_index):
+    s = pd.Series(range(100, 110)).sample(n=n, ignore_index=ignore_index)
+    assert len(s) == (n if n is not None else 1)
+    assert_index_equal(s.index, s.index)
+
+
+@pytest.mark.parametrize("frac", [None, 0, 0.1, 0.5, 0.8, 1])
+@sql_count_checker(query_count=4)
+def test_series_sample_frac(frac, ignore_index):
+    s = pd.Series(range(100, 110)).sample(frac=frac, ignore_index=ignore_index)
+    assert s.index.is_unique
+    assert_index_equal(s.index, s.index)
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda s: s.sample(weights="weights"),
+        lambda s: s.sample(replace=True),
+        lambda s: s.sample(random_state=1),
+        lambda s: s.sample(frac=2),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_sample_not_implemented(ops):
+    with pytest.raises(NotImplementedError):
+        ops(pd.Series(range(10)))
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda s: s.sample(n=1, frac=1),
+        lambda s: s.sample(axis=1),
+        lambda s: s.sample(axis="columns"),
+        lambda s: s.sample(n=-1),
+        lambda s: s.sample(n=0.1),
+        lambda s: s.sample(frac=-1),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_series_sample_negative_value_error(ops):
+    with pytest.raises(ValueError):
+        ops(pd.Series(range(10)))
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        lambda s: s.sample(n=100),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_series_sample_over_size_n(ops):
+    with pytest.raises(
+        ValueError,
+        match="Cannot take a larger sample than population when 'replace=False'",
+    ):
+        native_pd.Series(range(10)).sample(n=100)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        pd.Series(range(10)).sample(n=100),
+        native_pd.Series(range(10)),
+    )
diff --git a/tests/integ/modin/series/test_setitem.py b/tests/integ/modin/series/test_setitem.py
new file mode 100644
index 00000000000..8941c2b8a18
--- /dev/null
+++ b/tests/integ/modin/series/test_setitem.py
@@ -0,0 +1,2445 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import numbers
+import random
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import is_bool, is_list_like
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+# General note
+# ------------
+# - This file aims to document all possible combinations/behaviors with Series.__setitem__ and difference between
+#   Series.__setitem__ and loc setitem.
+#
+# - Snowpark pandas code can cause type coercion (implicit casting) issues if:
+#   - a comparison between two different types is made, or
+#   - an item of a certain value is assigned to a column with a different data type.
+#   Type coercion is usually done through an IFF/coalesce statement. Sometimes this coercion works, sometimes it fails.
+#
+# - the IFF statement in set_frame_2d_labels_with_scalar_row() in indexing_utils.py is the source of the type casting
+#   issue. This might be taken care of if it is a significant use case.
+#
+# - Cases where a slice item can be assigned as if it's a scalar value (series[key] = slice item) are not supported.
+#   This is because we need a way to represent a Python slice object in Snowflake. This is tested in
+#   `test_series_setitem_array_like_key_and_scalar_item_mixed_types()`and should raise NotImplementedError.
+#
+# - Series, DataFrame items which behave like a scalar item (object is assigned to a single label) raise ValueError
+#   since they result in nest series or df.
+# - DataFrame key raises ValueError.
+# - slice value that behaves like a scalar value raises ValueError.
+# _ Cases that raise NotImplementedError:
+#   - list-like or Series key with range-like or slice items
+#   - boolean cases listed below
+
+# - More information: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+
+# Boolean cases
+# -------------
+# These cases are not supported due to type casting issues in Snowflake. This because the boolean key can be
+# compared with non-boolean values: Snowflake tries to cast non-boolean values to boolean type and fails.
+#
+# Below is the code used to generate boolean list-likes/series.
+# def generate_random_boolean_list():
+#     return [random.choice([True, False]) for _ in range(14)]
+#
+# values that can be used as key and item should be added to SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_{NO|WITH}_DUPLICATES:
+# native_pd.Series([random.choice([True, False]) for _ in range(7)]),
+# np.array([random.choice([True, False]) for _ in range(7)]),
+#
+# BOOLEAN_ARRAY_LIKE_VALUES = [
+#     generate_random_boolean_list(),
+#     native_pd.Index(generate_random_boolean_list()),
+#     np.array(generate_random_boolean_list()),
+# ]
+
+
+# BEHAVIOR TABLE
+# --------------
+# Table which records different series, scalar/list item, and int key behavior. All behavior should match this.
+# series[scalar key] = scalar/list item
+# series' index and scalar key must be of the same type, and key needs to be present in the index.
+PRINT_TABLE = False  # If True, a new BEHAVIOR_TABLE is printed.
+BEHAVIOR_TABLE = """
+Series locset int key and scalar/list item behavior.
++--------------+----------------+----------------------+--------------------------------------------------------------+
+| Series type  | Item type      | Exception type       | Error message                                                |
++--------------+----------------+----------------------+--------------------------------------------------------------+
+| int          | float          | -                    | No error: Matches native pandas.                             |
+| int          | string         | SnowparkSQLException | Numeric value 'writing' is not recognized                    |
+| int          | timestamp      | SnowparkSQLException | Can not convert parameter '"values"."__reduced___' of type [ |
+| -            | -              | -                    | NUMBER(38,0)] into expected type [TIMESTAMP_NTZ(9)]          |
+| int          | int list       | ValueError           | setting an array element with a sequence.                    |
+| int          | string list    | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| int          | float list     | ValueError           | setting an array element with a sequence.                    |
+| int          | timestamp list | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| float        | int            | -                    | No error: Matches native pandas.                             |
+| float        | string         | SnowparkSQLException | Numeric value 'writing' is not recognized                    |
+| float        | timestamp      | SnowparkSQLException | Can not convert parameter '"values"."__reduced___' of type [ |
+| -            | -              | -                    | FLOAT] into expected type [TIMESTAMP_NTZ(9)]                 |
+| float        | int list       | ValueError           | setting an array element with a sequence.                    |
+| float        | string list    | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| float        | float list     | ValueError           | setting an array element with a sequence.                    |
+| float        | timestamp list | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| string       | int            | SnowparkSQLException | Numeric value 'love' is not recognized                       |
+| string       | float          | SnowparkSQLException | Numeric value 'love' is not recognized                       |
+| string       | timestamp      | SnowparkSQLException | Timestamp 'love' is not recognized                           |
+| string       | int list       | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| string       | string list    | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| string       | float list     | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| string       | timestamp list | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| timestamp    | int            | SnowparkSQLException | Can not convert parameter '"values"."__reduced___' of type [ |
+| -            | -              | -                    | TIMESTAMP_NTZ(9)] into expected type [NUMBER(1,0)]           |
+| timestamp    | float          | SnowparkSQLException | Can not convert parameter '"values"."__reduced___' of type [ |
+| -            | -              | -                    | TIMESTAMP_NTZ(9)] into expected type [NUMBER(2,1)]           |
+| timestamp    | string         | SnowparkSQLException | Timestamp 'writing' is not recognized                        |
+| timestamp    | int list       | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| timestamp    | string list    | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| timestamp    | float list     | NotImplementedError  | Currently do not support setting cell with list-like values  |
+| timestamp    | timestamp list | ValueError           | Could not convert object to NumPy datetime                   |
++--------------+----------------+----------------------+--------------------------------------------------------------+
+"""
+
+
+# Values that are scalars or behave like scalar keys and items.
+SCALAR_LIKE_VALUES = [0, "xyz", None, 3.14]
+
+# Values that behave like arrays/lists not all cases are covered here, should be covered in series/test_loc.py
+ARRAY_LIKE_VALUES = [
+    pd.Index(["a", "b", "c", "b"]),
+    np.array([-2, -1, -1, True]),
+    range(-2, 3),
+    slice(-100, 2),
+]
+
+SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_NO_DUPLICATES = [
+    # series
+    native_pd.Series([1, 4, 0, 2, 3]),
+    # list-like - not all cases are covered here, should be covered in series/test_loc.py
+    native_pd.Index([4, 6, 0, 5, 3]),
+]
+
+SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_WITH_DUPLICATES = [
+    # series
+    native_pd.Series([1, 4, 1, 0, 2, 3, 2]),
+    # list-like - not all cases are covered here, should be covered in series/test_loc.py
+    native_pd.Index([4, 6, 0, 6, 0, 5, 4]),
+]
+
+EMPTY_LIST_LIKE_VALUES = [
+    [],
+    pd.Index([]),
+    np.array([]),
+    native_pd.Series([]),
+]
+
+DATA_FOR_STRING_KEY_TYPE_CHECKING_TESTS = [
+    list(range(5)),  # only int
+    [3.3333, 1, 2, 3, 4],  # int + float
+    [1, "a", 2, 3, 4],  # int + string
+    [True, 1, 2, 3, 4],  # True + int
+    [False, "a", "b", "c", "d"],  # bool + string
+    [["a", 20, 30], 1, 2, 3, 4],  # mixed list + int
+    [[0], [1], [2], [3], [4]],  # only list
+    [0, [1], [2], [3], [4]],  # int + list
+    [[0], "a", [2], [3], [4]],  # list + string
+    [["a", 20, 30], 1, 2, "a", 0],  # mixed
+]
+
+
+# Note: if you are modifying these tests, please update the SQL counter and differentiate between native pandas and
+# Snowpark pandas behavior with an example.
+
+
+@pytest.mark.parametrize(
+    "key, item",
+    [
+        ("a", 35),  # "a" is present in the index
+        ("z", 35),  # "z" is not present in the index
+        (None, 35),  # None scalar
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_scalar_key_and_scalar_item(
+    key, item, default_index_native_int_series
+):
+    # series[scalar key] = scalar item
+    # --------------------------------
+    # This is the simplest case for series[scalar key] = scalar item: the key and series' index have the same type.
+    # Snowflake performs type coercion/implicit casting when the key and index have different types. This is
+    # illustrated in the tests below.
+    #
+    # Example:
+    # >>> series = pd.Series([2, 4, 6], index=["a", "b", "c"])
+    # >>> series["z"] = 65
+    # >>> series
+    # a     2
+    # b     4
+    # c     6
+    # z    65     <-- new index label created with item as the value
+    # dtype: int64
+
+    native_ser = default_index_native_int_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[key] = item
+    assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+
+@pytest.mark.xfail(reason="SNOW-979584 decide whether to support mixed type")
+@pytest.mark.parametrize(
+    "key, item",
+    [
+        ("xyz", 0),
+        ("a", None),
+        ("xyz", 3.14),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = scalar item
+    # --------------------------------
+    # Here, we test series[key] = item where item and key behave like scalar values, i.e. only one element in the
+    # series (at index `key`) is updated (or created if not present in series). The value set is `item`.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"])
+    # >>> series["xyz"] = slice(100, 400, 100)
+    # >>> series
+    # "0"                         a     <-- Snowflake type system turns the index column into string type
+    # "1"                         b
+    # "xyz"    slice(100, 400, 100)
+    # dtype: object
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[key] = item
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.xfail(reason="TODO SNOW-979584 decide whether to support mixed type")
+@pytest.mark.parametrize("item", ["abc", 3.14, None, np.nan])
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_none_key_and_scalar_item_mixed_type_series(
+    item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[None] = scalar item
+    # --------------------------
+    # This test is similar to the previous one, using None as the key instead.
+    #
+    # Example:
+    # >>> series = pd.Series([1.1, True, 1.2], index=["a", None, 23])
+    # >>> series[None] = "a"
+    # >>> series
+    #    0     1.1
+    # None       a    <-- value at label None is assigned the item
+    #    2     1.2
+    # dtype: object
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[None] = item
+    snowpark_ser[None] = item
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize(
+    "key, item",
+    [
+        (0, "a"),
+        ("xyz", "a"),
+        (3.14, "a"),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_scalar_key_and_scalar_item_mixed_type_series_type_coercion(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = scalar item
+    # --------------------------------
+    # This test highlights the Snowflake type coercion (implicit type casting) caused due to IFF.
+    # In cases where the key and item are scalar and the Series has values of different types, Snowpark pandas and
+    # native pandas exhibit different behavior.
+    #
+    # In Snowpark pandas, the types of all values in the Series and its index can be coerced (implicitly cast) which
+    # results in the types of values being changed (usually from variant to boolean). However, in native pandas, these
+    # types are retained. This behavior difference is due to the IFF statement (in set_frame_2d_labels_with_scalar_row()
+    # in indexing_utils.py) which is comparing index labels. The IFF statement coerces types based on Snowflake rules.
+    # In this case, the values are implicitly converted to string type.
+    # https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+    #
+    # Example:
+    # In native pandas,
+    # >>> series = native_pd.Series([1.1, True, 1.2])
+    # >>> series[0] = "a"
+    # >>> series
+    # 0       a      # string type
+    # 1    True      # boolean type
+    # 2     1.2      # float type
+    # dtype: object
+    #
+    # In Snowpark pandas,
+    # >>> series = pd.Series([1.1, True, 1.2])
+    # >>> series[0] = "a"
+    # >>> series
+    # 0       a      # string type
+    # 1    true      # string type (coerced to string from boolean)
+    # 2     1.2      # string type (coerced to string from float)
+    # dtype: object
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    with pytest.raises(AssertionError, match="are different"):
+        native_ser[key] = item
+        snowpark_ser[key] = item
+        # The series are not equal since the types of their values changed.
+        assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+# +------------------------------------------+
+# | series[scalar boolean key] = scalar item |
+# +------------------------------------------+
+# In general, native pandas treats True and 1 as the same label and False and 0 as the same label. Snowpark pandas
+# generally treats True and 1, and False and 0 as different labels.
+#
+# These are the four cases of setitem (LHS + RHS) in the box above. They are covered in separate tests below:
+# (1) Boolean scalar key is explicitly mentioned in the series index.
+# (2) The series has a default index, i.e., 0 and 1 are present in the index. True and False are absent.
+# (3) The series has both the boolean value and its corresponding int present in the series' index simultaneously.
+#     Native pandas treats (0 and False) and (1 and True) as the same label (duplicates).
+#     duplicate_index = [0, 1, False, True]
+# (4) The index of the series is mutually exclusive with `duplicate_index`. The series' index does not have any values
+#     from `duplicate_index`.
+
+
+# TODO: SNOW-986548 fix where key is False, row is missed in this case
+@pytest.mark.parametrize("key", [True, False])
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_boolean_key_and_scalar_item_label_updated(key, item):
+    # series[scalar boolean key] = scalar item
+    # ----------------------------------------
+    # CASE 1: Label updated: True/False present in the series' index with True/False key respectively.
+    # ------------------------------------------------------------------------------------------------
+    # Snowpark pandas matches native pandas behavior in most cases except when strings are involved.
+    # This test highlights the Snowflake coercion caused due to IFF for the case series[scalar key] = bool scalar.
+    # Here, Snowflake tries to coerce (implicitly cast) other types when updating the values in the series.
+    #
+    # This can arise when two values of different types are compared by Snowflake through IFF statements, or a value of
+    # typeA (e.g. int) gets assigned to a value's spot of typeB (e.g. string).
+    # Snowpark pandas logic.
+    #
+    # Relevant information:
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    # Examples:
+    # When an int item is used:
+    # In both native pandas and Snowpark pandas:
+    # In Snowpark pandas,
+    # >>> series = pd.Series([1.1, "hello", 1.2], index=["a", True, "b"])
+    # >>> series[True] = 34
+    # >>> series
+    # a       1.1
+    # True     34
+    # b       1.2
+    # dtype: object
+
+    # When a string item is used:
+    # In native pandas,
+    # >>> series = native_pd.Series([1.1, "hello", 1.2], index=["a", True, "b"])
+    # >>> series[True] = "xyz"
+    # >>> series
+    # a       1.1   <--- float
+    # True    xyz
+    # b       1.2   <--- float
+    # dtype: object
+    #
+    # In Snowpark pandas,
+    # >>> series = pd.Series([1.1, "hello", 1.2], index=["a", True, "b"])
+    # >>> series[True] = "xyz"
+    # >>> series
+    # a       1.1   <--- converted to string from float
+    # True    xyz
+    # b       1.2   <--- converted to string from float
+    # dtype: object
+
+    # series with numeric and string values
+    native_ser = native_pd.Series(
+        [1.1, "hello", 1.2, 34], index=["a", True, "b", False]
+    )
+    snowpark_ser = pd.Series(native_ser)
+
+    # perform setitem
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    # All the cases except where key is "xyz" match native pandas. "xyz" causes the remaining values in the series to
+    # be cast the string type.
+    if item != "xyz":
+        # match native pandas; compare with native pandas behavior
+        assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    else:
+        # This is the expected output after Snowpark pandas assigns a string item. All the values in the series are
+        # converted to string. `expected_ser` is native pandas object to simplify assertion below.
+        if key is True:
+            expected_ser = native_pd.Series(
+                ["1.1", item, "1.2", "34"], index=["a", True, "b", False]
+            )
+        else:  # False key
+            expected_ser = native_pd.Series(
+                ["1.1", "hello", "1.2", item], index=["a", True, "b", False]
+            )
+
+        # does not match native pandas; verify expected behavior
+        assert_series_equal(snowpark_ser, expected_ser, check_dtype=False)
+
+
+@pytest.mark.parametrize("key", [True, False])
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+def test_series_setitem_boolean_key_and_scalar_item_case2_numeric_index(key, item):
+    # series[scalar boolean key] = scalar item
+    # ----------------------------------------
+    # CASE 2: numeric index with True/False key (index that contains 0/1 but True and False are absent).
+    # --------------------------------------------------------------------------------------------------
+    # In native pandas, if True/False is not present in the labels but is used as the key, a KeyError is raised.
+    # However, True/False is a label in the index, pandas treats that as a regular label and assigns values to it.
+    # True and 1 are not treated as the same label if True is absent. False and 0 are not treated as the same label
+    # if False is absent.
+    # Example:
+    # >>> series = native_pd.Series([1.1, "hello", 1.2])
+    # >>> series[True] = "xyz"
+    # KeyError: 'cannot use a single bool to index into setitem'
+    #
+    # In Snowpark pandas, Snowflake always tries to cast all the boolean values to the provided key, in this case a
+    # boolean value. Snowflake casts numeric types to boolean types with the following logic: 0 is converted to False,
+    # all other values are converted to True. This test verifies that behavior.
+    # Example:
+    # >>> series = pd.Series([1.1, "hello", 1.2, [0, 3], 12, []])
+    # >>> series[True] = "new"
+    # >>> series
+    # False    1.1   <--- float value converted to string type
+    # True     new        because of Snowflake type coercion
+    # True     new
+    # True     new
+    # True     new
+    # True     new
+    # dtype: object
+    # In the example above, all the labels except 0 are converted to True and all their values are "new". Same behavior
+    # with False as the key.
+    #
+    # Relevant information:
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    if key is False and item == "xyz":
+        # skip test for now: weird behavior: result looks fine, their types are weird.
+        # False      xyz
+        # True     hello
+        # True     [0,3]
+        # True        12
+        # dtype: object
+        # Types of each element:
+        # <class 'str'>
+        # <class 'modin.pandas.series.Series'>
+        # <class 'modin.pandas.series.Series'>
+        # <class 'modin.pandas.series.Series'>
+        with SqlCounter(query_count=0):
+            return
+
+    # create the series and assign item
+    series_data = [1.1, "hello", [0, 3], 12]
+    # using default index since it is all numeric values; any numeric index should have the same behavior
+    snowpark_ser = pd.Series(data=series_data)
+    snowpark_ser[key] = item
+
+    # create the expected series - turn the index into True and False labels and assign item accordingly
+    index = [False, True, True, True]
+    data = []
+    for idx, label in enumerate(index):
+        if label == key:
+            data.append(item)
+        else:
+            data.append(series_data[idx])
+
+    if isinstance(item, str):
+        # All the non-string values are implicitly cast to string by Snowflake
+        data = [val if isinstance(val, str) else str(val) for val in data]
+
+    expected_ser = native_pd.Series(data=data, index=index)
+
+    with SqlCounter(query_count=1, join_count=1):
+        # verify that the result is correct
+        assert_series_equal(snowpark_ser, expected_ser)
+
+
+@pytest.mark.parametrize("key", [True, False])
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_boolean_key_and_scalar_item_case2_non_numeric_index(key, item):
+    # series[scalar boolean key] = scalar item
+    # ----------------------------------------
+    # CASE 2: non-numeric index with True/False key (index that contains 0/1 but True and False are absent).
+    # ------------------------------------------------------------------------------------------------------
+    # Here, the index needs to not have all numeric elements, otherwise it behaves like the previous test.
+    # A new label (True/False) is created in the series and item is assigned to it.
+    # Example:
+    # In native pandas:
+    # >>> series = native_pd.Series([1.1, "hello", [0, 3], 12], index=[0, 1, 2, []])
+    # >>> series[True] = "new"
+    # >>> series
+    # 0        1.1
+    # 1        new
+    # 2     [0, 3]
+    # []        12
+    # dtype: object
+    # In Snowpark pandas:
+    # >>> series = pd.Series([1.1, "hello", [0, 3], 12], index=[0, 1, 2, []])
+    # >>> series[True] = "new"
+    # >>> series
+    # 0         1.1
+    # 1       hello
+    # 2       [0,3]
+    # []         12
+    # True      new   <--- True is added as a new label/row
+    # In Snowpark pandas, True is treated as a label that is not present in the Series and is appended at the end.
+    # Same behavior with False as the key.
+    # Native pandas treats True and 1, and False and 0 as the same labels in this case -- no new value is appended but
+    # the values at the labels 0/1 are updated.
+    #
+    # Edge case: np.nan as a label:
+    # Exact same behavior as None as a label with np.nan being converted to None. Native pandas fails with
+    # KeyError: 'cannot use a single bool to index into setitem' for both np.nan and None labels.
+
+    # create the series with non-default index and assign item
+    data = [1.1, "hello", [0, 3], 12]
+    index = [0, "a", 2, [1, 2, 3]]
+    snowpark_ser = pd.Series(data=data, index=index)
+    snowpark_ser[key] = item
+
+    # create the expected series: append new label at the end
+    if isinstance(item, str):
+        # all the non-string values in the series are implicitly cast to strings by Snowflake
+        data = ["1.1", "hello", "[0,3]", "12", item]
+    else:
+        # new label and its value are appended to the series' index and data
+        data.append(item)
+    # index types remain the same/are not cast to different types
+    index.append(key)
+    expected_ser = native_pd.Series(data=data, index=index)
+
+    # verify that the result is correct
+    assert_series_equal(snowpark_ser, expected_ser)
+
+
+@pytest.mark.parametrize("key", [0, 1])
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_setitem_boolean_key_and_scalar_item_case3(
+    key, item, native_series_with_duplicate_boolean_index
+):
+    # series[scalar boolean key] = scalar item
+    # ----------------------------------------
+    # CASE 3: The series has both (0 and False) and (1 and True) in its index.
+    # ------------------------------------------------------------------------
+    # In native pandas, if 0 and False or 1 and True are present as labels of the series index together, 0 and 1 are
+    # treated as the labels from False and True respectively, and vice versa.
+    # >>> series = native_pd.Series([1.1, "hello", 1.2, [1, 2]], index=[0, 1, False, True])
+    # >>> series[True] = "xyz"    <--- series[1] = "xyz" does the same thing
+    # >>> series
+    # 0        1.1
+    # 1        xyz    <--- True updates the label 1 as well
+    # False    1.2
+    # True     xyz    <--- updated
+    # dtype: object
+    #
+    # Snowpark errors if True/False is used as the key. Currently, Snowpark pandas treats (0 and False) and (1 and True)
+    # as different/unique labels. So only one element is updated.
+    #
+    # EDGE CASE: `ser[1] = None` here behaves just like native pandas does. It updates the values at labels 1 and True.
+    # However, `ser[0]` = None` only updates values at the label 0. Native pandas does both 0 and False. In fact,
+    # In every single test here (and most probably in most cases) Snowpark pandas only matches 1 with 1, and 0 with 0,
+    # i.e., any item != None used will exhibit this behavior.
+
+    # Like the other tests, there is a possibility for type coercion.
+    # Relevant information:
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    native_ser = native_series_with_duplicate_boolean_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = "Series are different"
+    native_ser[key] = item
+    snowpark_ser[key] = item
+    if key == 1 and item is None:
+        # Edge case that passes - Snowpark pandas and native pandas treat 0 and False, and 1 and True as equivalent
+        # labels.
+        assert_series_equal(snowpark_ser, native_ser)
+    else:
+        with pytest.raises(AssertionError, match=err_msg):
+            # The series are not equal since native pandas treats 0 and False, and 1 and True as equivalent labels but
+            # Snowpark pandas does not.
+            assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.xfail(
+    reason="Snowflake type casting error caused due to comparison of different types with IFF.",
+    strict=False,
+)
+@pytest.mark.parametrize("key", [True, False])
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+@sql_count_checker(query_count=0)
+def test_series_setitem_boolean_key_and_scalar_item_case4_negative(
+    key, item, default_index_native_int_series
+):
+    # series[scalar boolean key] = scalar item (negative case)
+    # --------------------------------------------------------
+    # All tests should have the error: SnowparkSQLException: "Boolean value 'a' is not recognized". This is because of
+    # Snowflake's type coercion. Here Snowflake is trying to cast a string to boolean type and fails.
+    # Relevant information:
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    # CASE 4: The series' index does not contain values in [0, 1, True, False].
+    # -------------------------------------------------------------------------
+    # In native pandas, True/False is absent in the series' index and is passed in as a key, it errors: it can't use a
+    # scalar bool to index the series. Same as 0 or 1 being in the index.
+    # >>> series = native_pd.Series(["hello", 1.2, [1, 2]], index=["a", "b", "c"])
+    # >>> series[True] = "xyz"
+    # KeyError: 'cannot use a single bool to index into setitem'
+    # dtype: object
+    #
+    # Snowpark has a coercion/implicit type casting issue similar to the previous tests.
+    # >>> series = pd.Series(["hello", 1.2, [1, 2]], index=["a", "b", "c"])
+    # >>> series[True] = "xyz"
+    # SnowparkSQLException: Boolean value 'a' is not recognized
+
+    native_ser = default_index_native_int_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Snowpark series is compared with unmodified native pandas series to trigger computation. Here, Snowpark errors
+    # because Snowflake can't convert the string index labels to boolean type - done to compare with the True/False key.
+    snowpark_ser[key] = item
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.xfail(
+    reason="Snowflake type casting error caused due to comparison of different types with IFF.",
+    strict=False,
+)
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", [True, False])
+@sql_count_checker(query_count=0)
+def test_series_setitem_scalar_key_and_boolean_item_mixed_type_series_negative(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = scalar boolean item
+    # ----------------------------------------
+    # This test highlights the Snowflake coercion caused due to IFF for the case series[scalar key] = bool scalar.
+    # Explanation in tests above. Here, Snowflake tries to coerce (implicitly cast) other types (like variant in
+    # Snowflake) to boolean, and fails.
+    #
+    # All tests should fail with the error: SnowparkSQLException: "Failed to cast variant value <value> to BOOLEAN".
+    #
+    # Relevant information:
+    # IFF statement in set_frame_2d_labels_with_scalar_row() in indexing_utils.py.
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+    #
+    # Example:
+    # In native pandas,
+    # >>> series = native_pd.Series([1.1, "hello", 1.2])
+    # >>> series[0] = True
+    # >>> series
+    # 0     True      # boolean type
+    # 1    hello      # string type
+    # 2      1.2      # float type
+    # dtype: object
+    #
+    # In Snowpark pandas,
+    # >>> series = pd.Series([1.1, "hello", 1.2])
+    # >>> series[0] = True
+    # SnowparkSQLException: Failed to cast variant value "hello" to BOOLEAN
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[key] = item
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize("key", ["a", "z"])
+@pytest.mark.parametrize("item", [[2, 3, 4], native_pd.Series(["a", "b"])])
+@sql_count_checker(query_count=0)
+def test_series_setitem_scalar_key_and_array_like_and_series_item(key, item):
+    # series[scalar key] = array-like/series item
+    # -------------------------------------------
+    # This is the simplest case for series[scalar key] = array-like item. `item` here is an array-like/series value
+    # but is assigned to only one element at index label `key`, just like a scalar value would be assigned.
+    #
+    # pandas Example:
+    # >>> series = pd.Series([2, 4, 6], index=["a", "b", "c"])
+    # >>> series["z"] = [3, 4, 6]
+    # >>> series
+    # a            2
+    # b            4
+    # c            6
+    # z    [3, 4, 6]
+    # dtype: object
+    #
+    # >>> series["c"] = [3, 4, 6]
+    # ValueError: setting an array element with a sequence.
+    # But series["c"] = ["3", "4", "6"] works.
+    #
+    # >>> series["z"] = pd.Series([3, 4, 6])
+    # >>> series
+    # a              2
+    # b              4
+    # c              6
+    # z    0    3        <--- series is assigned to a single element
+    #      1    4
+    #      2    6
+    #      dtype: int64
+    # dtype: object
+    #
+    # >>> series["z"] = pd.Series([3, 4, 6])
+    # ValueError: setting an array element with a sequence.
+    # But series["c"] = pd.Series(["3", "4", "6"]) works.
+    #
+    # Snowpark pandas does not support those cases now.
+    # TODO: SNOW-991872 support set array values
+
+    native_ser = native_pd.Series([2, 4, 6], index=["a", "b", "c"])
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    if key in native_ser and not isinstance(item[0], str):
+        # existing index
+        with pytest.raises(ValueError):
+            native_ser[key] = item
+    else:
+        # non-existing index
+        native_ser[key] = item
+
+    with pytest.raises((ValueError, NotImplementedError)):
+        snowpark_ser[key] = (
+            pd.Series(item) if isinstance(item, native_pd.Series) else item
+        )
+        snowpark_ser.to_pandas()
+
+
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", ARRAY_LIKE_VALUES)
+@sql_count_checker(query_count=0)
+def test_series_setitem_scalar_key_and_array_like_item_mixed_types(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = array-like item
+    # ------------------------------------
+    # Like the previous test, this test performs series[key] = item. `item` here is an array-like value but is assigned
+    # to only one element at index label `key`, just like a scalar value would be assigned.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"], index=[1, False])
+    # >>> series[False] = range(-2, 3)
+    # >>> series
+    # 1                        a
+    # False    (-2, -1, 0, 1, 2)
+    # dtype: object
+    # Normally, False as the key would error since you cannot use single boolean values as indexers but since False was
+    # already set as a part of the index, it can be used to index the series.
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    with pytest.raises(NotImplementedError):
+        native_ser[key] = item
+        snowpark_ser[key] = item
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", [native_pd.Series(["a", "abc", "ab", "abcd"])])
+@sql_count_checker(query_count=0)
+def test_series_setitem_scalar_key_and_series_item_negative(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = series item
+    # --------------------------------
+    # Using a scalar key and series item. Like the previous test, the item is assigned to only one element in the
+    # series at index `key`. The item behaves like a scalar value even though it's a Series object. This feature is
+    # currently not supported in Snowpark pandas - raises ValueError.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"])
+    # >>> series["c"] = series     <-- does not work because it causes infinite recursion
+    # >>> series["c"] = pd.Series(["a", "b"])
+    # >>> series
+    # 0                a
+    # 1                b
+    # c    0    a      <-- the series item is assigned as a single value to a particular index label
+    #      1    b
+    #      dtype: object
+    # dtype: object
+
+    snowpark_ser = pd.Series(mixed_type_index_native_series_mixed_type_index)
+    err_msg = "Scalar key incompatible with Snowpark pandas Series value"
+    with pytest.raises(ValueError, match=err_msg):
+        snowpark_ser[key] = pd.Series(item)
+
+
+@pytest.mark.parametrize("key", SCALAR_LIKE_VALUES)
+@pytest.mark.parametrize("item", [native_pd.DataFrame({"A": [34, 35], "B": [76, 77]})])
+@sql_count_checker(query_count=0)
+def test_series_setitem_scalar_key_and_df_item_mixed_types_negative(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[scalar key] = df item
+    # ----------------------------
+    # Using a scalar key and df item. Like the previous test, the item is assigned to only one element in the
+    # series at index `key`. The item behaves like a scalar value even though it's a DataFrame object. This feature is
+    # currently not supported in Snowpark pandas - raises ValueError.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b"])
+    # >>> series["c"] = pd.DataFrame({"A": [34, 35], "B": [76, 77]})
+    # >>> series
+    # 0                   a
+    # 1                   b
+    # c        A   B      <-- the df item is assigned as a single value to a particular index label
+    #      0  34  76
+    #      1  35  77
+    # dtype: object
+
+    snowpark_ser = pd.Series(mixed_type_index_native_series_mixed_type_index)
+    err_msg = "Scalar key incompatible with Snowpark pandas DataFrame value"
+    with pytest.raises(ValueError, match=err_msg):
+        snowpark_ser[key] = pd.DataFrame(item)  # nested df
+
+
+@pytest.mark.parametrize("key", [[1, 2, 3], native_pd.Series([5, 6, 3])])
+@pytest.mark.parametrize("item", ["abc", 24])
+def test_series_setitem_array_like_and_series_key_and_scalar_item(
+    key, item, default_index_native_series
+):
+    # series[array-like key] = scalar item
+    # ------------------------------------
+    # This is the simplest case for series[array-like/series key] = scalar item. The expected behavior is that
+    # multiple index labels specified by `key` have `item` assigned to them all.
+    #
+    # In Snowpark pandas, the types of all values in the Series and its index can be coerced (implicitly cast) which
+    # results in the types of values being changed (usually from variant to boolean). However, in native pandas, these
+    # types are retained. This behavior difference is due to the IFF statement (in set_frame_2d_labels_with_scalar_row()
+    # in indexing_utils.py) which is comparing index labels. The IFF statement coerces types based on Snowflake rules.
+    # In this case, the values are implicitly converted to string type.
+    # https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[[2, 0, 1]] = "xyz"
+    # >>> series
+    # 0    xyz
+    # 1    xyz
+    # 2    xyz
+    # 3      d
+    # dtype: object
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[pd.Series(key) if isinstance(key, native_pd.Series) else key] = item
+    with SqlCounter(query_count=1, join_count=2):
+        if item == 24:
+            # Cases that pass - Snowpark pandas and native pandas assign values to the same labels. The int item here
+            # does not trigger type coercion.
+            assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+        else:  # item = "abc"
+            # The series are not equal since the types of their values changed.
+            err_msg = "Series are different"
+            with pytest.raises(AssertionError, match=err_msg):
+                assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", [1, 12, [1, 2, 3], native_pd.Series([0, 4, 5])])
+def test_series_setitem_slice_item_negative(key, default_index_native_series):
+    # series[array-like/scalar key] = slice item
+    # ------------------------------------------
+    # Here, slice is treated like a scalar object and assigned as itself to given key(s). This behavior is currently
+    # not supported in Snowpark pandas.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[range(2)] = slice(20, 30, 40)
+    # >>> series
+    # 0    slice(20, 30, 40)
+    # 1    slice(20, 30, 40)
+    # 2                    c
+    # 3                    d
+    # dtype: object
+
+    snowpark_ser = pd.Series(default_index_native_series)
+    item = slice(20, 30, 40)
+    err_msg = (
+        "Currently do not support assigning a slice value as if it's a scalar value"
+    )
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_ser[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = item
+
+
+@pytest.mark.parametrize("item", [0, ["a", "b", "c"], native_pd.Series([1, 2])])
+@sql_count_checker(query_count=0)
+def test_series_setitem_df_key_negative(item, default_index_native_series):
+    # series[df key] = any item
+    # -------------------------
+    # Should raise a ValueError since this should be impossible.
+    snowpark_ser = pd.Series(default_index_native_series)
+    key = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    err_msg = "Snowpark pandas DataFrame cannot be used as an indexer with Series"
+    with pytest.raises(ValueError, match=err_msg):
+        snowpark_ser[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = item
+
+
+@pytest.mark.parametrize("key", ARRAY_LIKE_VALUES)  # + BOOLEAN_ARRAY_LIKE_VALUES)
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES)
+# Parameters commented here due to reasons described at the top.
+@sql_count_checker(query_count=1, join_count=2)
+def test_series_setitem_array_like_key_and_scalar_item_mixed_types(
+    key, item, mixed_type_index_native_series_mixed_type_index
+):
+    # series[array-like key] = scalar item
+    # ------------------------------------
+    # Using an array-like key and scalar values. The expected behavior is that multiple index labels specified by `key`
+    # have `item` assigned to them all. In Snowpark pandas, like in an IFF statement, Snowflake implictly casts (coerces
+    # types) during comparison, this can result in either all the elements in the original series to turn into string
+    # type or raises an error because it failed to perform this casting operation. Slice objects can behave like scalar
+    # objects where each label in the key is assigned `slice item`. This slice object behavior is not supported in
+    # Snowpark pandas because currently there is no way to represent a Python slice object in Snowflake.
+    #
+    # Example:
+    # Behavior matches
+    # ----------------
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[range(2)] = "abc"
+    # >>> series
+    # 0    abc
+    # 1    abc
+    # 2      c
+    # 3      d
+    # dtype: object
+    #
+    # Behavior mismatch
+    # -----------------
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[range(2)] = 23
+    # Native pandas behavior:
+    # >>> series
+    # 0    23
+    # 1    23
+    # 2     c
+    # 3     d
+    # dtype: object
+    # Using a numeric key here raises a type coercion error in Snowpark pandas. This is most likely because Snowflake
+    # failed to cast the string key to a numeric value.
+    # >>> series[range(2)] = 23
+    # SnowparkSQLException: Numeric value 'c' is not recognized
+    #
+    # Native pandas behavior:
+    # >>> series = pd.Series(["a", "b", True, "d"])
+    # >>> series[range(1)] = "ab"
+    # >>> series
+    # 0      23
+    # 1       b
+    # 2    True   <-- retains boolean type
+    # 3       d
+    # dtype: object
+    # Snowpark pandas behavior:
+    # >>> series = pd.Series(["a", "b", True, "d"])
+    # >>> series[range(1)] = 23
+    # >>> series
+    # 0      ab
+    # 1       b
+    # 2    true   <-- string type (coerced to string from boolean)
+    # 3       d
+    # dtype: object
+
+    native_ser = mixed_type_index_native_series_mixed_type_index.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item.
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    err_msg = "Series are different"
+    if not isinstance(key, slice) and any(x == 1 for x in key):
+        # If the key contains the int 1, native pandas treats this as True but Snowpark pandas sees them as different
+        # labels. This does not happen in all cases that contain 1 (like the elif statement below).
+        with pytest.raises(AssertionError, match=err_msg):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    elif (isinstance(key, pd.Index) and item == "xyz") or (
+        isinstance(key, slice) and (item is None or item == "xyz")
+    ):
+        # In these three cases, Snowpark pandas works and the values are logically correct. However, Snowpark pandas
+        # casts all values in the series to string which results in a type mismatch when compared to native pandas.
+        # Here, native pandas treats 1 and True as different labels, like Snowpark pandas does.
+        # 1. key = Index(['a', 'b', 'c', 'b'], dtype='object'), item = 'xyz'
+        # 2. key = slice(-100, 2, None), item = 'xyz'
+        # 3. key = slice(-100, 2, None), item = None
+        with pytest.raises(AssertionError, match=err_msg):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    else:
+        # In all other cases, native pandas and Snowpark pandas behavior matches.
+        assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+
+@pytest.mark.skip(
+    "slice item doesn't work - not JSON serializable. Everything else runs but needs optimization."
+)
+@pytest.mark.parametrize(
+    "key",
+    [
+        native_pd.Series([1, 5, 3]),
+        native_pd.Series(
+            [random.choice([True, False]) for _ in range(random.randint(14, 24))]
+        ),
+    ],
+)
+@pytest.mark.parametrize("item", SCALAR_LIKE_VALUES + [slice(10, 20, 30)])
+def test_series_setitem_series_key_and_scalar_item(
+    key, item, default_index_native_series
+):
+    # series[series key] = scalar item
+    # -------------------------------------
+    # Using a series key and a scalar item. The expected behavior is that multiple index labels specified by the
+    # series `key` have `item` assigned to them. Let `data` be the series which calls __setitem__. A boolean series
+    # `key` of any length >= `data`'s index length is valid provided `key`'s index matches the index of `data`.
+    #
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[pd.Series([1, 2])] = "stringval"
+    # >>> series
+    # 0            a
+    # 1    stringval
+    # 2    stringval
+    # 3            d
+    # dtype: object
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Behavior does not match native pandas due to type coercion, so Series don't match in most cases.
+    # Relevant information:
+    # IFF statement in set_frame_2d_labels_with_scalar_row() in indexing_utils.py.
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    err_msg = "Series are different"
+    qc = 2 if is_bool(key[0]) else 3
+    with SqlCounter(query_count=qc, join_count=1):
+        native_ser[key] = item
+        snowpark_ser[key] = item
+        if isinstance(item, numbers.Number):
+            assert_series_equal(snowpark_ser, native_ser)
+        else:
+            with pytest.raises(AssertionError, match=err_msg):
+                assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize(
+    "key", SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_WITH_DUPLICATES
+)
+@pytest.mark.parametrize(
+    "item",
+    SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_WITH_DUPLICATES,
+)
+@sql_count_checker(query_count=1, join_count=4)
+def test_series_setitem_series_list_like_item_key_and_item_with_duplicates(
+    key, item, default_index_native_series
+):
+    # series[series/list-like key] = series/list-like item
+    # ----------------------------------------------------
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[pd.Series([1, 2])] = pd.Index(["abc", "xyz"])
+    # >>> series
+    # 0      a
+    # 1    abc
+    # 2    xyz
+    # 3      d
+    # dtype: object
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[pd.Series(key) if isinstance(key, native_pd.Series) else key] = (
+        pd.Series(item) if isinstance(item, native_pd.Series) else item
+    )
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+# matching_item_row_by_label is False here.
+@pytest.mark.parametrize("key", SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_NO_DUPLICATES)
+@pytest.mark.parametrize(
+    "item",
+    SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_NO_DUPLICATES,
+)
+@sql_count_checker(query_count=1, join_count=4)
+def test_series_setitem_series_list_like_item_key_and_item_no_duplicates(
+    key, item, default_index_native_series
+):
+    # series[series/list-like key] = series/list-like item
+    # ----------------------------------------------------
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[pd.Series([1, 2])] = pd.Index(["abc", "xyz"])
+    # >>> series
+    # 0      a
+    # 1    abc
+    # 2    xyz
+    # 3      d
+    # dtype: object
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    native_ser[key] = item
+    snowpark_ser[pd.Series(key) if isinstance(key, native_pd.Series) else key] = (
+        pd.Series(item) if isinstance(item, native_pd.Series) else item
+    )
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize("key", [range(2, 6, 2), slice(1, 7, 3)])
+@pytest.mark.parametrize("item", [native_pd.Series(["abc", 21])])
+@sql_count_checker(query_count=1, join_count=3)
+def test_series_setitem_range_like_key_and_series_list_like_item(
+    key, item, default_index_native_series
+):
+    # series[range-like key] = series/list-like item
+    # ----------------------------------------------
+    # Ranges are treated like lists.
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[range(1, 3)] = pd.Index(["abc", "xyz"])
+    # >>> series
+    # 0      a
+    # 1    abc
+    # 2    xyz
+    # 3      d
+    # dtype: object
+    #
+    # Cases like `item = np.array([True, False])]` are not supported due to type casting issues in Snowflake. Error
+    # arises because the boolean key is being compared with non-boolean values: Snowflake tries to cast non-boolean
+    # values to boolean type and fails.
+    # Relevant information:
+    # IFF statement in set_frame_2d_labels_with_scalar_row() in indexing_utils.py.
+    # ref: https://docs.snowflake.com/en/sql-reference/data-type-conversion#implicit-casting-coercion
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item and compare results.
+    # __setitem__/loc set behavior with Series values is broken in native pandas after 2.x
+    # https://github.com/pandas-dev/pandas/issues/51386
+    # Instead, we use iloc set to mimic the expected behavior
+    native_ser.iloc[key] = item
+    snowpark_ser[key] = pd.Series(item) if isinstance(item, native_pd.Series) else item
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize(
+    "key",
+    SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_NO_DUPLICATES
+    + SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_WITH_DUPLICATES,
+)
+@pytest.mark.parametrize("item", [range(7)])
+@sql_count_checker(query_count=0)
+def test_series_setitem_series_list_like_key_and_range_like_item_negative(
+    key, item, default_index_native_series
+):
+    # series[series/list-like key] = range-like item
+    # ----------------------------------------------
+    # Ranges are treated like lists. This case is not implemented yet.
+    # Example:
+    # >>> series = pd.Series(["a", "b", "c", "d"])
+    # >>> series[pd.Series([1, 2])] = range(5, 30, 23)
+    # >>> series
+    # 0     a
+    # 1     5
+    # 2    28
+    # 3     d
+    # dtype: object
+
+    snowpark_ser = pd.Series(default_index_native_series)
+    err_msg = "Currently do not support Series or list-like keys with range-like values"
+    with pytest.raises(NotImplementedError, match=err_msg):
+        snowpark_ser[key] = item
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_setitem_lambda_series():
+    # series[lambda key] = scalar
+    # ---------------------------
+    data = {"a": 1, "b": 2, "c": 3}
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    def helper(ser):
+        ser[lambda x: x < 2] = 8
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, helper, inplace=True)
+
+
+def test_series_setitem_lambda_key():
+    # series[lambda key] = scalar/list-like/series item
+    # -------------------------------------------------
+    # Using a lambda as the key and scalar, list-like, and series as the item.
+    #
+    # Example:
+    # Expected native pandas behavior:
+    # >>> series = pd.Series(["a", "b", "c", "d", "e", "f"])
+    # >>> series[lambda x: x < "d"] = "new val"
+    # >>> series
+    # 0    new val
+    # 1    new val
+    # 2    new val
+    # 3          d
+    # 4          e
+    # 5          f
+    # dtype: object
+
+    ser = native_pd.Series(list(range(10)))
+
+    # set every element in series to 99 where element > 22
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[lambda x: x > 22] = 99
+    snowpark_ser[lambda x: x > 22] = 99
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+    # set every element divisible by 4 to 3
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[lambda z: ((z + z) % 4) == 0] = 3
+    snowpark_ser[lambda z: ((z + z) % 4) == 0] = 3
+    # Using 3.14 instead of 3 in the above test, Snowpark pandas series has a mix of Decimal objects and floats in the
+    # result but native pandas is only floats.
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+    # Weird case with native pandas:
+    # If the item is a Series, first native pandas applies the lambda to the `series` index and collects these labels.
+    # Then pandas tries to match these labels with the ones from the index of the item. If there is a match, a value is
+    # assigned. If there is no match, NaN is assigned.
+
+    # set every element divisible by 3 to Series([10, 11, 12, 13])
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    native_ser[lambda y: y % 3 == 0] = native_pd.Series([10, 11, 12, 13])
+    snowpark_ser[lambda y: y % 3 == 0] = pd.Series([10, 11, 12, 13])
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+
+@pytest.mark.xfail(
+    reason="Snowflake type casting error caused due to comparison of different types with IFF.",
+    strict=False,
+)
+@sql_count_checker(query_count=0)
+def test_series_setitem_lambda_key_string_compare():
+    # series[lambda key] = scalar/list-like/series item
+    # -------------------------------------------------
+    # Using a lambda as the key and scalar, list-like, and series as the item. The tests here fail with the error:
+    # SnowparkSQLException: "Numeric value 'a' is not recognized". This is most likely because Snowflake
+    # failed to cast the string 'a' to a numeric value.
+    #
+    # Example:
+    # Expected native pandas behavior:
+    # >>> series = pd.Series(["a", "b", "c", "d", "e", "f"])
+    # >>> series[lambda x: x < "d"] = "new val"
+    # >>> series
+    # 0    new val
+    # 1    new val
+    # 2    new val
+    # 3          d
+    # 4          e
+    # 5          f
+    # dtype: object
+
+    # set every element in series to 99 where element > "c"
+    native_ser = native_pd.Series(["a", "b", "c", "d", "e", "f"])
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[lambda x: x > "c"] = 99
+    snowpark_ser[lambda x: x > "c"] = 99
+    assert_series_equal(snowpark_ser, native_ser)
+
+    # set every element divisible by 3 to "a"
+    native_ser = native_pd.Series(list(range(10)))
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[lambda y: y % 3 == 0] = native_pd.Series(["a", "b", "c", "d"])
+    snowpark_ser[lambda y: y % 3 == 0] = pd.Series(["a", "b", "c", "d"])
+    assert_series_equal(snowpark_ser, native_ser)
+
+    # set every element divisible by 4 to "bc"
+    native_ser = native_pd.Series(list(range(10)))
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[lambda z: ((z + z) % 4) == 0] = "bc"
+    snowpark_ser[lambda z: ((z + z) % 4) == 0] = "bc"
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+def test_series_setitem_comparator_key():
+    # series[comparator key] = scalar/list-like/series item
+    # ------------------------------------------------------
+    # Using a lambda as the key and scalar, list-like, and series as the item. Series behavior in Snowpark pandas does
+    # not completely match native pandas at the moment.
+    #
+    # Example:
+    # Expected native pandas behavior:
+    # >>> series = pd.Series(["a", "b", "c", "d", "e", "f"])
+    # >>> series[lambda x: x < "d"] = "new val"
+    # >>> series
+    # 0    new val
+    # 1    new val
+    # 2    new val
+    # 3          d
+    # 4          e
+    # 5          f
+    # dtype: object
+
+    ser = native_pd.Series(list(range(10)))
+
+    # set every element in series to 99 where element > 22
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[native_ser > 22] = 99
+    snowpark_ser[snowpark_ser > 22] = 99
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+    # set every element which has a remainder of 1 when divided by 4 to 3.
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[((native_ser + native_ser) % 4) == 1] = 3
+    snowpark_ser[((snowpark_ser + snowpark_ser) % 4) == 1] = 3
+    # Using 3.14 instead of 3 in the above test, Snowpark pandas series has a mix of Decimal objects and floats in the
+    # result but native pandas is only floats.
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+    # Weird case with native pandas:
+    # If the item is a Series, first native pandas applies the lambda to the `series` index and collects these labels.
+    # Then pandas tries to match these labels with the ones from the index of the item. If there is a match, a value is
+    # assigned. If there is no match, NaN is assigned.
+
+    # set every element divisible by 3 to Series([10, 11, 12, 13])
+    native_ser = ser.copy()
+    snowpark_ser = pd.Series(native_ser)
+    native_ser[native_ser % 3 == 0] = native_pd.Series([10, 11, 12, 13])
+    snowpark_ser[snowpark_ser % 3 == 0] = pd.Series([10, 11, 12, 13])
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+            snowpark_ser, native_ser
+        )
+
+
+@pytest.mark.xfail(
+    reason="Snowflake type casting error caused due to comparison of different types with IFF.",
+    strict=False,
+)
+@sql_count_checker(query_count=5)
+def test_series_setitem_comparator_key_string_compare():
+    # series[comparator key] = scalar/list-like/series item
+    # -----------------------------------------------------
+    # Using a comparator/condition as the key and scalar, list-like, and series as the item. The tests here fail with
+    # the error: SnowparkSQLException: "Numeric value 'a' is not recognized". This is most likely because Snowflake
+    # failed to cast the string 'a' to a numeric value.
+    #
+    # Example:
+    # >>> series = pd.Series(list(range(8)))
+    # >>> series[series > 4]
+    # 5    5
+    # 6    6
+    # 7    7
+    # dtype: int64
+    # >>> series[series > 4] = 2
+    # >>> series
+    # 0    0
+    # 1    1
+    # 2    2
+    # 3    3
+    # 4    4
+    # 5    2
+    # 6    2
+    # 7    2
+    # dtype: int64
+
+    # set every element in series to 99 where element > "c"
+    native_ser = native_pd.Series(["a", "b", "c", "d", "e", "f"])
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[native_ser > "c"] = 99
+    snowpark_ser[snowpark_ser > "c"] = 99
+    assert_series_equal(snowpark_ser, native_ser)
+
+    # set every element divisible by 3 to Series(["a", "b", "c", "d"])
+    native_ser = native_pd.Series(list(range(10)))
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[native_ser % 3 == 0] = native_pd.Series(["a", "b", "c", "d"])
+    snowpark_ser[snowpark_ser % 3 == 0] = pd.Series(["a", "b", "c", "d"])
+    assert_series_equal(snowpark_ser, native_ser)
+
+    # set every element divisible by 4 to "bc"
+    native_ser = native_pd.Series(list(range(10)))
+    snowpark_ser = pd.Series(native_ser)
+    # Assign item and compare results.
+    native_ser[((native_ser + native_ser) % 4) == 1] = "bc"
+    snowpark_ser[((snowpark_ser + snowpark_ser) % 4) == 1] = "bc"
+    assert_series_equal(snowpark_ser, native_ser)
+
+
+def test_series_setitem_series_behavior_that_deviates_from_loc_set(
+    default_index_native_series,
+):
+    # Test to highlight the behavior difference with data series' index and item's index for loc set and __setitem__
+    # --------------------------------------------------------------------------------------------------------------
+    # If value is a Series, value's index doesn't matter/is ignored. However, loc setitem matches the key's
+    # index with value's index. To emulate this behavior, treat the Series as if it is matching by position.
+    #
+    # For example,
+    # With __setitem__, the index of value does not matter.
+    # >>> series = pd.Series([1, 2, 3], index=["a", "b", "c"])
+    # >>> series[["a", "b"]] = pd.Series([9, 8])
+    # a    9
+    # b    8
+    # c    3
+    # dtype: int64
+    # value = pd.Series([9, 8], index=["foo", "bar"]) also produces same result as above.
+    #
+    # However, with loc setitem, index matters.
+    # >>> series.loc[["a", "b"]] = pd.Series([9, 8])
+    # a    NaN
+    # b    NaN
+    # c    3.0
+    # dtype: float64
+    #
+    # >>> series.loc[["a", "b"]] = pd.Series([9, 8], index=["a", "b"])
+    # a    9
+    # b    8
+    # c    3
+    # dtype: int64
+    # Due to the behavior above, loc setitem can work with any kind of value regardless of length.
+    # With __setitem__, the length of the value must match length of the key. Currently, loc setitem can
+    # handle this with boolean keys.
+    index = ["a", "b", "c", "d", "e", "f"]
+    snowpark_ser = pd.Series(list(range(6)), index=index)
+
+    # If item is a Series, item's index doesn't matter/is ignored. It should set the elements at index "b" and "c" to
+    # 20 and 30 respectively.
+    snowpark_setitem_ser = snowpark_ser.copy()
+    snowpark_setitem_ser[["b", "c"]] = pd.Series([20, 30])
+
+    expected = native_pd.Series([0, 20, 30, 3, 4, 5], index=index)
+
+    with SqlCounter(query_count=1):
+        assert_series_equal(snowpark_setitem_ser, expected, check_dtype=False)
+
+    expected_with_nan = native_pd.Series([0, None, None, 3, 4, 5], index=index)
+    # loc setitem matches the data's index with item's index. So the elements at "b" and "c" are set to NaN.
+    snowpark_locset_ser = snowpark_ser.copy()
+    snowpark_locset_ser.loc[["b", "c"]] = pd.Series([20, 30], index=["x", "y"])
+    with SqlCounter(query_count=1):
+        assert_series_equal(snowpark_locset_ser, expected_with_nan, check_dtype=False)
+
+    snowpark_locset_ser = snowpark_ser.copy()
+    # This will fail because join string "b", "c" with item's 0, 1 int index is not recognized in Snowflake
+    snowpark_locset_ser.loc[["b", "c"]] = pd.Series([20, 30])
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException):
+            snowpark_locset_ser.to_pandas()
+
+    # However, using a series key with the index set to match the key used, the __setitem__ behavior is replicated.
+    snowpark_locset_ser2 = snowpark_ser.copy()
+    snowpark_locset_ser2.loc[["b", "c"]] = pd.Series([20, 30], index=["b", "c"])
+
+    # They should be equal.
+    with SqlCounter(query_count=1):
+        assert_series_equal(snowpark_locset_ser2, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=0)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize(
+    "item", EMPTY_LIST_LIKE_VALUES[:-1]
+)  # ignore last element: empty series
+def test_series_setitem_with_empty_key_and_empty_item_negative(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series[empty list-like/series key] = empty list-like item
+    # ---------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # In Snowpark pandas we raise a ValueError because performing the check on the frontend to mimic pandas' behavior
+    # makes the code more complex and there are more cases to handle.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = "The length of the value/item to set is empty"
+    with pytest.raises(ValueError, match=err_msg):
+        native_ser[key] = item
+        snowpark_ser[
+            pd.Series(key) if isinstance(key, native_pd.Series) else key
+        ] = item
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=4)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_series_setitem_with_empty_key_and_empty_series_item(
+    key,
+    default_index_native_series,
+):
+    # series[empty list-like/series key] = empty series item
+    # ------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Snowpark pandas mirrors this behavior.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+    item = native_pd.Series([])
+
+    native_ser[key] = item
+    snowpark_ser[
+        pd.Series(key) if isinstance(key, native_pd.Series) else key
+    ] = pd.Series(item)
+    assert_snowpark_pandas_equal_to_pandas(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+def test_series_setitem_with_empty_key_and_scalar_item(
+    key,
+    default_index_native_series,
+):
+    # series[empty list-like/series key] = scalar item
+    # -------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # Snowpark pandas mirrors this behavior. If a string scalar item is used, the rest of the values in the series are
+    # converted to strings, like:
+    # >>> series[[]] = "a"
+    # 0             1
+    # 1           1.1
+    # 2          true   <--- converted to string from boolean
+    # 3             a
+    # 4    2021-01-01
+    # 5           [1]
+    # 6           [1]
+    # dtype: object
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+    item = 32
+
+    native_ser[key] = item
+    snowpark_ser[pd.Series(key) if isinstance(key, native_pd.Series) else key] = item
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snowpark_ser, native_ser)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+@pytest.mark.parametrize("key", EMPTY_LIST_LIKE_VALUES)
+@pytest.mark.parametrize("item", SERIES_AND_LIST_LIKE_KEY_AND_ITEM_VALUES_NO_DUPLICATES)
+def test_series_setitem_with_empty_key_and_series_and_list_like_item_negative(
+    key,
+    item,
+    default_index_native_series,
+):
+    # series[empty list-like/series key] = list-like/series item
+    # ----------------------------------------------------------
+    # In native pandas, there is no change to the Series:
+    # 0             1
+    # 1           1.1
+    # 2          True
+    # 3             a
+    # 4    2021-01-01
+    # 5          (1,)
+    # 6           [1]
+    # dtype: object
+    #
+    # In Snowpark pandas we raise a ValueError because performing the check on the frontend to mimic pandas' behavior
+    # makes the code more complex and there are more cases to handle.
+
+    native_ser = default_index_native_series.copy()
+    snowpark_ser = pd.Series(native_ser)
+
+    err_msg = (
+        "cannot set using a list-like indexer with a different length than the value"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        native_ser[key] = item
+        snowpark_ser[pd.Series(key) if isinstance(key, native_pd.Series) else key] = (
+            pd.Series(item) if isinstance(item, native_pd.Series) else item
+        )
+        assert_series_equal(snowpark_ser, native_ser)
+
+
+@pytest.mark.parametrize("index", [True, False], ids=["with_index", "without_index"])
+class TestEmptySeries:
+    # empty_series
+    # ------------
+    # An empty series differs based on what its index is.
+    #
+    # If an index is not specified/default index:
+    # >>> series = pd.Series()
+    # >>> series
+    # Series([], dtype: float64)   <-- the default dtype for Series will be object in the future
+    #
+    # If an index is specified:
+    # >>> series = pd.Series(index=[0, 1, 2])
+    # >>> series
+    # 0   NaN
+    # 1   NaN
+    # 2   NaN
+    # dtype: float64   <-- the default dtype for Series will be object in the future
+
+    def test_empty_series_setitem_slice(self, index):
+        # empty_series[slice key] = scalar item
+        # -------------------------------------
+        # using key = slice(None), all items should be updated. If no values in the Series are present (default index),
+        # the series is unchanged --> remains empty.
+        #
+        # If an index is not specified/default index:
+        # >>> series = pd.Series()
+        # >>> series[slice(None)] = 1
+        # >>> series
+        # Series([], dtype: float64)
+        #
+        # If an index is specified:
+        # >>> series = pd.Series(index=[0, 1, 2])
+        # >>> series[slice(None)] = 1
+        # >>> series
+        # 0    1.0   <-- all the values in the series are changed from NaN to 1.0
+        # 1    1.0
+        # 2    1.0
+        # dtype: float64
+
+        kwargs = {}
+        if index:
+            kwargs["index"] = [0, 1, 2]
+        native_ser = native_pd.Series(**kwargs)
+        snow_ser = pd.Series(native_ser)
+
+        def setitem_slice(series):
+            series[:] = 1
+
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_ser,
+                native_ser,
+                setitem_slice,
+                inplace=True,
+            )
+
+    @pytest.mark.parametrize("key", [0, 11])
+    def test_empty_series_setitem_scalar(self, index, key):
+        # empty_series[scalar key] = scalar item
+        # --------------------------------------
+        # using a scalar key, only the value at the label that matches the scalar key should be updated.
+        kwargs = {}
+        if index:
+            kwargs["index"] = [0, 1, 2]
+        native_ser = native_pd.Series(**kwargs)
+        snow_ser = pd.Series(native_ser)
+
+        def setitem_scalar(series):
+            series[key] = 1
+
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_ser,
+                native_ser,
+                setitem_scalar,
+                inplace=True,
+            )
+
+
+@pytest.mark.parametrize("data", DATA_FOR_STRING_KEY_TYPE_CHECKING_TESTS)
+@pytest.mark.parametrize("item", [12, -8.999])
+def test_series_setitem_check_type_behavior_with_string_key_and_number_scalar_item(
+    data, item
+):
+    # series[string key] = int/float item
+    # -----------------------------------
+    # - If assigning value where a list used to exist, Snowflake will raise an error like: SnowparkSQLException:
+    #   "Can not convert parameter '"values"."__reduced___o42q"' of type [ARRAY] into expected type [NUMBER(1,0)]".
+    #
+    # - If a float item is assigned to a series with only int float values, all the values are converted to very
+    #   specific Decimal objects -- precision and number of digits matter with Decimal objects. The values are correct
+    #   and the type is maintained in a different format but is ultimately different from native pandas.
+    #
+    # - Assigning a value to any other type retains the type; no casting.
+    #
+    # Example:
+    # >>> series = pd.Series([["a", 20, 30], 1, 2, "a"], index=["xyz", "A", "B", "C"])
+    # >>> series
+    # xyz    [a, 20, 30]
+    # A                1
+    # B                2
+    # C                a
+    # dtype: object
+    # >>> series["xyz"] = 12
+    # >>> series
+    # xyz    12
+    # A       1
+    # B       2
+    # C       a
+    # dtype: object
+
+    # STRING KEY
+    key = "xyz"
+
+    # Create a string index - because we need the label "xyz" to be present, Snowflake will try to cast everything to a
+    # string type - easier to compare with native pandas if it is a string index.
+    index = ["xyz", "A", "B", "C", "D"]
+    # Create series with data and the index below
+    native_ser = native_pd.Series(data=data, index=index)
+    snowpark_ser = pd.Series(native_ser)
+
+    # Check whether all values in the data are lists:
+    all_values_are_lists = all(isinstance(val, list) for val in data)
+
+    # Assign item at key and compare
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    if all_values_are_lists:
+        # Snowflake will error because it is trying to set an int item to a column of ARRAY type.
+        err_msg = "Can not convert parameter"
+        with SqlCounter(query_count=0, join_count=0):
+            with pytest.raises(SnowparkSQLException, match=err_msg):
+                assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+    else:
+        # All other cases match native pandas behavior
+        with SqlCounter(query_count=1, join_count=1):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+
+@pytest.mark.parametrize("item", [True, False])
+def test_series_setitem_check_type_behavior_with_string_key_and_boolean_scalar_item(
+    item,
+):
+    # series[string key] = True/False item
+    # ------------------------------------
+    # column = column where item is assigned to.
+    # - Success if values can be cast to boolean type.
+    #   This is the only successful case. Snowflake tries to cast the values in the "assignee" column to a boolean type;
+    #   it works with columns of numeric or string type but fails in all cases where the column type is VARIANT, ARRAY,
+    #   and the rest. On success (values can be cast to boolean type), all values except 0 are turned into True
+    #   while 0 is turned into False. This behavior is shown in the example in CASE 1 below.
+    #
+    # In the rest of the cases, Snowflake fails to cast the column type to boolean and raises a SnowparkSQLException:
+    # - "Can not convert parameter '"values"."__reduced___agzk"' of type [ARRAY] into expected type [BOOLEAN]" when
+    #     the item is being assigned to a spot where a list exists and the series only contains lists.
+    #
+    # - "Failed to cast variant value [1] to BOOLEAN" when the item is being assigned to a row in a column of type
+    #   VARIANT.
+
+    # STRING KEY
+    key = "xyz"
+
+    # Create a string index - because we need the label "xyz" to be present, Snowflake will try to cast everything to a
+    # string type - easier to compare with native pandas if it is a string index.
+    index = ["xyz", "A", "B", "C", "D"]
+
+    # CASE 1: Snowflake casting is successful, numeric or string type columns:
+    # ------------------------------------------------------------------------
+    data = [0, 1.2, 3, 5, 0]
+    # Create series with data and the index below
+    native_ser = native_pd.Series(data=data, index=index)
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item at key
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    # Snowpark pandas does not raise any errors for this case but has weird behavior. Values 0 are set to False and
+    # the rest of the values are set to True - the whole series is turned into a boolean series.
+    # >>> series = pd.Series([1.3, "a", 23], index=["a", "b", "c"])
+    # >>> series["a"] = True
+    # >>> series
+    # a    True
+    # b    True
+    # c    True
+    # dtype: bool
+    with SqlCounter(query_count=1, join_count=1):
+        err_msg = "Series are different"
+        with pytest.raises(AssertionError, match=err_msg):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    # Compare with expected results - 0 is turned to False, rest are turned to True.
+    expected_data = [False if val == 0 else True for val in native_ser]
+    expected_ser = native_pd.Series(data=expected_data, index=index)
+    assert_series_equal(snowpark_ser, expected_ser, check_dtype=False)
+
+    # CASE 2: Snowflake casting is unsuccessful, variant column:
+    # ----------------------------------------------------------
+    data = [1, "a", 2, 3, 4]
+    # Create series with data and the index below
+    native_ser = native_pd.Series()
+    snowpark_ser = pd.Series(data=data, index=index)
+
+    # Assign item at key
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    err_msg = "Failed to cast variant value"
+    with SqlCounter(query_count=0, join_count=0):
+        with pytest.raises(SnowparkSQLException, match=err_msg):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    # CASE 3: Snowflake casting is unsuccessful, array column:
+    # --------------------------------------------------------
+    data = [[0], [1], [2], [3], [4]]
+    # Create series with data and the index below
+    native_ser = native_pd.Series(data=data, index=index)
+    snowpark_ser = pd.Series(native_ser)
+
+    # Assign item at key
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    err_msg = "Can not convert parameter"
+    with SqlCounter(query_count=0, join_count=0):
+        with pytest.raises(SnowparkSQLException, match=err_msg):
+            assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+
+@pytest.mark.parametrize("data", DATA_FOR_STRING_KEY_TYPE_CHECKING_TESTS)
+def test_series_setitem_check_type_behavior_with_string_key_and_string_scalar_item(
+    data,
+):
+    # series[string key] = string item
+    # --------------------------------
+    # - When the all the values in the series are of int/float type, original value at key is int/float type,
+    #   Snowflake assumes that the item passed in is also going to be some number type. It tries to cast the string item
+    #   to numeric type and fails with error: SnowparkSQLException: "Numeric value 'string!' is not recognized".
+    #
+    # - In all other cases, Snowflake's type casting is in the opposite direction: if the item is a string, it will try
+    #   to cast the rest of the value in the series to be string as well.
+    #
+    # - If all the values in series are of string type, this is the only case Snowpark pandas exactly matches native
+    #   pandas. Assignment/performing locset is successful only when the original value at key can be cast to string.
+    #
+    # - In the cases where the value cannot be cast to a string, as in the case of a column of ARRAY type, Snowflake
+    #   raises the error: SnowparkSQLException: "Can not convert parameter '"values"."__reduced___jgjz"' of type [ARRAY]
+    #   into expected type [VARCHAR(6)]".
+
+    # STRING KEY AND ITEM
+    key = "xyz"
+    item = "string!"
+
+    # Create a string index - because we need the label "xyz" to be present, Snowflake will try to cast everything to a
+    # string type - easier to compare with native pandas if it is a string index.
+    index = ["xyz", "A", "B", "C", "D"]
+    # Create series with data and the index below
+    native_ser = native_pd.Series(data=data, index=index)
+    snowpark_ser = pd.Series(native_ser)
+
+    # Check if all the values in the series are numeric
+    all_values_are_numeric = all(
+        isinstance(val, numbers.Number) and not is_bool(val) for val in data
+    )
+    # Check if all the values in the series are array-like
+    all_values_are_array_like = all(is_list_like(val) for val in data)
+
+    # Assign item at key
+    native_ser[key] = item
+    snowpark_ser[key] = item
+
+    if all_values_are_numeric:
+        with SqlCounter(query_count=0, join_count=0):
+            # If all the values in the series are numeric, Snowflake tries to cast the string to a numeric value and fails
+            err_msg = re.escape("Numeric value 'string!' is not recognized")
+            with pytest.raises(SnowparkSQLException, match=err_msg):
+                assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    elif all_values_are_array_like:
+        with SqlCounter(query_count=0, join_count=0):
+            # If all the values in the series are array-like, Snowflake tries to cast them to string and fails
+            err_msg = re.escape("Can not convert parameter")
+            with pytest.raises(SnowparkSQLException, match=err_msg):
+                assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+    else:
+        # Snowflake successfully casts all the values in the series to string. Native pandas retains the value types.
+        err_msg = "Series are different"
+
+        # If the rest of the data apart from value at label is a string type, ignore below error checking.
+        will_raise_error = not all(isinstance(val, str) for val in data[1:])
+        if will_raise_error:
+            with pytest.raises(AssertionError, match=err_msg):
+                assert_series_equal(snowpark_ser, native_ser, check_dtype=False)
+
+        expected_data = [str(val) for val in native_ser]
+        expected_ser = native_pd.Series(data=expected_data, index=index)
+        with SqlCounter(query_count=1, join_count=1):
+            assert_series_equal(snowpark_ser, expected_ser, check_dtype=False)
+
+
+def test_series_setitem_value_length_is_short():
+    native_series = native_pd.Series([1, 2, 3, 4], index=pd.Index(["a", "b", "c", "d"]))
+    snow_series = pd.Series(native_series)
+
+    with pytest.raises(
+        ValueError,
+        match="cannot set using a list-like indexer with a different length than the value",
+    ):
+        native_series[["a", "b", "c"]] = native_pd.Series([0, 1])
+
+    def setitem_helper(series):
+        if isinstance(series, native_pd.Series):
+            series[["a", "b", "c", "d"]] = native_pd.Series([0, 1, 1, 1])
+        else:
+            series[["a", "b", "c", "d"]] = pd.Series([0, 1])
+
+    with SqlCounter(query_count=1):
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            setitem_helper,
+            inplace=True,
+        )
+
+
+@pytest.mark.parametrize(
+    "start",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "stop",
+    [None, -1, 1, 4, 10],
+)
+@pytest.mark.parametrize(
+    "step",
+    [None, 1, -1, 2, -2, 9, -9],
+)
+@sql_count_checker(query_count=1, join_count=3)
+def test_series_setitem_key_slice_with_series(start, stop, step):
+    key = slice(start, stop, step)
+
+    ser_data = ["x", "y", "z", "w", "u"]
+    item_data = ["A", "B", "C", "D", "E"]
+    idx = [0, 1, 2, 3, 4]
+    native_ser = native_pd.Series(ser_data)
+    snow_ser = pd.Series(ser_data)
+
+    slice_len = len(native_pd.Series(idx)[key])
+
+    native_item_ser = native_pd.Series(
+        item_data[:slice_len],
+        dtype="str",
+        index=pd.Index(idx[:slice_len], dtype="int32"),
+    )
+
+    def set_loc_helper(ser):
+        ser[key] = (
+            pd.Series(native_item_ser)
+            if isinstance(ser, pd.Series)
+            else native_item_ser
+        )
+
+    if step is not None and step < 0:
+        set_loc_helper(snow_ser)
+        # pandas may fail when step is negative, so we convert slice key to list key to make it work
+        key = native_ser.index[key]
+        set_loc_helper(native_ser)
+        assert_snowpark_pandas_equal_to_pandas(snow_ser, native_ser)
+
+    elif slice_len == 0:
+        # pandas can fail in this case, so we skip call loc for it, see more below in
+        # test_series_loc_set_key_slice_with_series_item_pandas_bug
+        set_loc_helper(snow_ser)
+        # snow_ser should not change when slice_len = 0
+        assert_snowpark_pandas_equal_to_pandas(snow_ser, native_ser)
+    else:
+        eval_snowpark_pandas_result(snow_ser, native_ser, set_loc_helper, inplace=True)
+
+
+@pytest.mark.parametrize("key", [True, False, 0, 1])
+@pytest.mark.parametrize(
+    "index",
+    [
+        [0, 1, True, False, "x"],
+        [0, 1, True, "x"],
+        [1, True, False, "x"],
+        [1, True, "x"],
+        [0, 1, False, "x"],
+        [0, False, "x"],
+        [2, "x"],
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_df_setitem_boolean_key(key, index):
+    item = 99
+
+    data = list(range(len(index)))
+
+    native_ser = native_pd.Series(data, index=index)
+    snow_ser = pd.Series(native_ser)
+
+    snow_ser[key] = item
+
+    # In pandas, setitem for 0/False and 1/True will potentially set multiple values or fail to set at all if neither
+    # keys already exist in the index.  In Snowpark pandas we treat 0 & False, and 1 & True as distinct values, so
+    # they are independently settable, whether they exist or do not already exist in the series index.
+    try:
+        key_index = [str(v) for v in native_ser.index].index(str(key))
+        native_ser.iloc[key_index] = item
+    except ValueError:
+        native_ser = native_pd.concat(
+            [native_ser, native_pd.Series([item], index=[key])]
+        )
+
+    assert_series_equal(snow_ser, native_ser, check_dtype=False)
+
+
+def series_setitem_generate_type_behavior_table():
+    # Behavior comparison of Snowpark pandas and native pandas
+    # --------------------------------------------------------
+    # Generate dict which records the behavior of Snowpark pandas and native pandas, with exception details and casting
+    # information.
+    # Can access values as dict[series][key][item] --> will produce a nested dict
+
+    # List of type of data and data to create series with for behavior comparison with pandas. Any data can be added:
+    # - all data below will be used as item types
+    # - non-list data will be used as series types.
+    # Format: ("type <list>", [data])
+    types_and_data = [
+        ("int", [23, 0, 9, -2, 100]),
+        ("float", [3.14, -0.999, 5.4, 100.001, -27.0008]),
+        ("string", ["I", "love", "writing", "Snowpark pandas", "tests!"]),
+        (
+            "timestamp",
+            [
+                pd.Timestamp("2017-01-03"),
+                pd.Timestamp("2017-01-03"),
+                pd.Timestamp("2017-01-03"),
+                pd.Timestamp("2018-01-03"),
+                pd.Timestamp("2019-01-03"),
+            ],
+        ),
+        ("int list", [[0, 1], [3, 4], [6, 7], [-9, -10], [-12, -13]]),
+        (
+            "string list",
+            [
+                ["one", "two", "3"],
+                ["four", "five", "6"],
+                ["seven", "eight", "9"],
+                ["0", "one", "two"],
+                ["ten", "11", "12"],
+            ],
+        ),
+        ("float list", [[3.14], [9.8], [6.667], [3.000], [1.414]]),
+        (
+            "timestamp list",
+            [[pd.Timestamp("2017-01-03")], [pd.Timestamp("2017-01-04")]] * 5,
+        ),
+        # Data below is commented out/unused to reduce the size of dict generated.
+        # ("empty list", [[]] * 5),
+        # ("None", [None] * 5),
+        # ("NaN", [np.nan] * 5),
+    ]
+
+    # Index mapping for different key types - used for series generation.
+    index_dict = {
+        "int": list(range(5)),
+        "float": [1.001, 2.002, 3.003, 4.004, 5.005],
+        "string": list(str(i) for i in range(5)),
+    }
+
+    # The key mapping for different key types.
+    keys_dict = {"int": 0, "float": 1.001, "string": "1"}
+    # Create a list of data to help with printing.
+    table_data = []
+    # Remove any list types for series data.
+    series_types_and_data = [
+        (_type, _data) for _type, _data in types_and_data if "list" not in _type
+    ]
+
+    # Try all possible combinations of series, key, and item types to record behavior.
+
+    # Create a dict to hold type behavior data.
+    # series_type -> item_type_to_key_dict
+    series_type_to_item_dict = {}
+    for series_type, series_data in series_types_and_data:
+
+        # item_type -> key_type_to_behavior_dict
+        item_type_to_key_dict = {}
+        for item_type, item_list in types_and_data:
+
+            # Skip combinations with same series type and item type - behavior should match native pandas.
+            if item_type == series_type:
+                continue
+
+            # key_type -> (exception type, error message)
+            key_type_to_err_msg_dict = {}
+            for key_type in ["int"]:
+                # , "float", "string"]: not generating data for float and string to reduce dict size.
+                # Get the key and corresponding index.
+                key = keys_dict[key_type]
+                index = index_dict[key_type]
+
+                # Create the series.
+                native_ser = native_pd.Series(series_data, index=index)
+                snowpark_ser = pd.Series(native_ser)
+
+                # Pick item from the list of items and assign to series.
+                item = item_list[2]
+
+                # Record behavior.
+                clean_err_msg = "-"
+                err_type = "-"
+                try:
+                    native_ser[key] = item
+                    snowpark_ser[key] = item
+
+                    # If a float item is being assigned to an int series, Snowflake converts all series values to a
+                    # Decimal object. Convert all these values to float before comparison to see if we have the
+                    # correct results.
+                    if series_type == "int" and item_type == "float":
+                        snowpark_ser = pd.Series([float(val) for val in snowpark_ser])
+
+                    # Compare the Snowpark pandas and native pandas results.
+                    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+                        snowpark_ser, native_ser
+                    )
+
+                except Exception as exception:
+                    # Values used to print table.
+                    err_msg = str(exception).strip()
+                    err_type = type(exception).__name__
+
+                    # Remove the random jargon '__reduced___lhgj' to prevent random output.
+                    if "__reduced" in err_msg:
+                        red_loc = err_msg.find("__reduced___")
+                        err_msg = (
+                            err_msg[: red_loc + 12]
+                            + err_msg[min(red_loc + 17, len(err_msg) - 1) :]
+                        )
+
+                    # If error is known and generates custom error message based on key/item values, truncate it.
+                    known_err_msg = [
+                        "Can not convert parameter",
+                        "Series values are different",
+                        "Series.index are different",
+                        "Numeric value ",
+                        "Failed to cast variant value",
+                        "Timestamp '",
+                    ]
+                    clean_err_msg = err_msg
+                    for snippet in known_err_msg:
+                        if snippet in err_msg:
+                            clean_err_msg = err_msg[err_msg.find(snippet) :]
+
+                else:
+                    # Snowpark pandas and native pandas match! Therefore, type is retained.
+                    err_type = "-"
+                    clean_err_msg = "No error: Matches native pandas."
+
+                finally:
+                    # key_type -> (exception type, error message)
+                    entry = [err_type, clean_err_msg]
+                    key_type_to_err_msg_dict[key_type] = entry
+
+                    # Add extra data to table for printing.
+                    row = [series_type, item_type] + entry
+                    table_data.append(row)
+
+            # item_type -> key_type_dict
+            item_type_to_key_dict[item_type] = key_type_to_err_msg_dict
+        # series_type -> item_type_dict
+        series_type_to_item_dict[series_type] = item_type_to_key_dict
+
+    # For dict only:
+    # print(series_type_to_item_dict)
+
+    # PRINTING
+    # --------
+    # Table is printed in output window.
+    if PRINT_TABLE:
+        # Table header.
+        col_list = [
+            "Series type",
+            "Item type",
+            "Exception type",
+            "Error message",
+        ]
+        num_cols = len(col_list)
+
+        # Print the header.
+        # +-------------+------------+
+        # | Series type | Item type  | . . .
+        # +-------------+------------+
+
+        # Create the horizontal lines and whitespace in the table based on column width below. For future, key type is 9
+        len_name_no_space = [12, 14, 20, 60]
+
+        # Format each column and concatenate them with '+'.
+        horizontal_line = "+"  # starting char
+        for dash_len in len_name_no_space:
+            border_segment = "-" * (dash_len + 2) + "+"
+            horizontal_line = horizontal_line + border_segment
+
+        # Create the heading row.
+        formatted_row = "| "
+        for i in range(num_cols):
+            col_name = str(col_list[i])
+            format_logic = "{: <" + str(len_name_no_space[i]) + "}"
+            table_value = format_logic.format(col_name)
+            formatted_row = formatted_row + table_value + " | "
+        heading_row = formatted_row
+
+        # Print table name.
+        print("\nSeries locset int key and scalar/list item behavior.")
+        # Print header.
+        print(horizontal_line)
+        print(heading_row)
+        print(horizontal_line)
+
+        # Print rows.
+        for row_data in table_data:
+            # Print all data except for the last one - error message.
+            formatted_row = "| "
+            for i in range(num_cols - 1):
+                col_name = str(row_data[i])
+                format_logic = "{: <" + str(len_name_no_space[i]) + "}"
+                table_value = format_logic.format(col_name)
+                formatted_row = formatted_row + table_value + " | "
+            # Print first line of error message.
+            err_msg = row_data[-1]
+            err_msg_format_logic = "{: <" + str(len_name_no_space[-1]) + "}"
+            formatted_err_msg = (
+                err_msg_format_logic.format(err_msg[: len_name_no_space[-1]]) + " | "
+            )
+            print(formatted_row + formatted_err_msg)
+
+            # If error message is too long, print multi-line error message.
+            if len(err_msg) > len_name_no_space[-1]:  # multi-line
+                # Create empty row.
+                empty_row = "| "
+                for i in range(num_cols - 1):
+                    format_logic = "{: <" + str(len_name_no_space[i]) + "}"
+                    table_value = format_logic.format("-")
+                    empty_row = empty_row + table_value + " | "
+                while len(err_msg) > len_name_no_space[-1]:
+                    err_msg = err_msg[len_name_no_space[-1] :]
+                    formatted_err_msg = (
+                        err_msg_format_logic.format(err_msg[: len_name_no_space[-1]])
+                        + " | "
+                    )
+                    print(empty_row + formatted_err_msg)
+
+        # Print table bottom.
+        print(horizontal_line)
+
+    # Used for verifying whether recorded table is up-to-date.
+    return series_type_to_item_dict
+
+
+@sql_count_checker(no_check=True)
+def test_behavior_table_is_up_to_date():
+    # Test that checks whether the behavior table is correct.
+    # No SQL count checking is done since this test only verifies whether the contents of BEHAVIOR_TABLE are correct.
+    # locset is run to collect the data but the data itself is not verified by this test - it is just recorded. Other
+    # locset tests verify locset functionality, this method checks whether the documented behavior is up-to-date.
+
+    series_type_to_item_dict = series_setitem_generate_type_behavior_table()
+
+    # Read the string, use it to index the dict, and compare data.
+    # Split string into rows.
+    lines_in_table = BEHAVIOR_TABLE.splitlines()
+
+    prev_err_msg = ""  # used for verification
+    prev_row_data = ""  # used for the assert error message
+    for line in lines_in_table:
+        # All data lines begin with | as the column boundary
+        clean_row_data = []
+        if len(line) > 0 and "|" == line[0]:
+            row_data = line.split("| ")
+            # Each value in row_data has whitespace and "|" after it - remove it.
+            for data in row_data:
+                data = data[:-1].strip()
+                if data != "":
+                    clean_row_data.append(data)
+            row_data = clean_row_data
+
+            # If this row is an error msg overflow line, the last entry should be non-empty. All other entries are "-".
+            if all(data == "-" for data in row_data[-1]) and len(row_data[-1]) > 0:
+                if row_data[-1] not in prev_err_msg:
+                    raise AssertionError(
+                        f"The error message is different for: {prev_row_data} - update table"
+                    )
+            else:
+                # This row is a new entry. The fields in order are: "Series type", "Item type", "Exception type",
+                # and "Error message".
+                series_type = row_data[0]
+                item_type = row_data[1]
+                err_type = row_data[2]
+                err_msg = row_data[3]
+
+                key_type = (
+                    "int"  # no longer included in the table, here for future reference
+                )
+
+                if series_type in ["int", "float", "string"]:
+                    # Data for generating error messages.
+                    curr_row_data = {
+                        "series_type": series_type,
+                        "item_type": item_type,
+                        "key_type": key_type,
+                    }
+                    (expected_err_type, expected_err_msg,) = series_type_to_item_dict[
+                        series_type
+                    ][item_type][key_type]
+
+                    # Verify that table and new data values match.
+                    if err_type != expected_err_type:
+                        raise AssertionError(
+                            f"Exception raised: table: {err_type}, expected: {expected_err_type} for {curr_row_data}."
+                        )
+
+                    # The error message for each combination is truncated over multiple rows, check whether all parts
+                    # of the error message match, through existing row traversal.
+                    if err_msg not in expected_err_msg:
+                        raise AssertionError(
+                            f"Error message is wrong for {curr_row_data}. table: {err_msg}, "
+                            f"expected: {expected_err_msg[:100]}"
+                            + (". . ." if len(expected_err_msg) > 100 else "")
+                        )
+                        prev_row_data = curr_row_data
+                        prev_err_msg = expected_err_msg
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_series_setitem_int_key():
+    # pandas series setitem with int key is similar to loc set in most cases:
+    # E.g., set index with label 3 to 100
+    native_s = native_pd.Series([0, 0, 0], index=[1, 2, 3])
+    native_s[3] = 100
+    assert_series_equal(native_s, native_pd.Series([0, 0, 100], index=[1, 2, 3]))
+
+    # E.g., set index with label 5 to 100, since 5 is not in the existing indices, enlargement is allowed.
+    native_s = native_pd.Series([0, 0, 0], index=[1, 2, 3])
+    native_s[5] = 100
+    assert_series_equal(native_s, native_pd.Series([0, 0, 0, 100], index=[1, 2, 3, 5]))
+
+    # If the int key does not exist and is a valid position, it will be treated as
+    # an iloc set and set the element at the specified position. However, if the index is a float64,
+    # as of pandas 2.X, the series will be enlarged with the new key added.
+    native_s = native_pd.Series([0, 0, 0], index=["1", "2", "3"])
+    native_s[0] = 100
+    assert_series_equal(native_s, native_pd.Series([100, 0, 0], index=["1", "2", "3"]))
+
+    native_s = native_pd.Series([0, 0, 0], index=[1.1, 1.2, 1.3])
+    native_s[0] = 100
+    assert_series_equal(
+        native_s, native_pd.Series([0, 0, 0, 100], index=[1.1, 1.2, 1.3, 0])
+    )
+
+    native_s = native_pd.Series([0, 0, 0], index=[1, 2, 3])
+    native_s[0] = 100
+    assert_series_equal(native_s, native_pd.Series([0, 0, 0, 100], index=[1, 2, 3, 0]))
+
+    # Snowpark pandas always treats series setitem as loc set. It matches the behavior of the future version of pandas.
+    s = pd.Series([0, 0, 0], index=[1, 2, 3])
+    s[0] = 100
+    assert_snowpark_pandas_equal_to_pandas(
+        s, native_pd.Series([0, 0, 0, 100], index=[1, 2, 3, 0]), check_dtype=False
+    )
+
+    s = pd.Series([0, 0, 0], index=[1.1, 2.2, 3.3])
+    s[0] = 100
+    assert_snowpark_pandas_equal_to_pandas(
+        s, native_pd.Series([0, 0, 0, 100], index=[1.1, 2.2, 3.3, 0]), check_dtype=False
+    )
diff --git a/tests/integ/modin/series/test_shape.py b/tests/integ/modin/series/test_shape.py
new file mode 100644
index 00000000000..7bbc1270a05
--- /dev/null
+++ b/tests/integ/modin/series/test_shape.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "args, kwargs",
+    [
+        ([{"A": [1, 2, 3]}], {}),
+        ([{"A": []}], {}),
+        ([[]], {}),
+        ([[np.nan]], {}),
+        ([np.nan], {}),
+        ([None], {}),
+        ([], {"index": []}),
+    ],
+    ids=[
+        "simple non-empty",
+        "empty column",
+        "no name empty column",
+        "no name only containing np.nan column",
+        "data only has np.nan",
+        "data is None",
+        "empty series with only index",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_shape(args, kwargs):
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.shape,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/series/test_shift.py b/tests/integ/modin/series/test_shift.py
new file mode 100644
index 00000000000..3f459af3e9f
--- /dev/null
+++ b/tests/integ/modin/series/test_shift.py
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._libs.lib import no_default
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_SERIES = [
+    native_pd.Series(),
+    native_pd.Series(index=[1, 2, 3, 4]),
+    native_pd.Series([1, 2, 3, 4]),
+    native_pd.Series(["a", None, 1, 2, 4.5]),
+    native_pd.Series([2.0, None, 3.6, -10], index=[1, 2, 3, 4]),
+]
+
+
+@pytest.mark.parametrize("series", TEST_SERIES)
+@pytest.mark.parametrize(
+    "periods", [0, -1, 1, 3, -3, 10, -10]
+)  # test here special cases and periods larger than number of rows of dataframe
+@pytest.mark.parametrize(
+    "fill_value", [None, no_default, 42]
+)  # no_default is the default value, so test explicitly as well. 42 will lead to conflicts
+@sql_count_checker(query_count=1)
+def test_series_with_values_shift(series, periods, fill_value):
+
+    snow_series = pd.Series(series)
+    native_series = series.copy()
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: s.shift(periods=periods, fill_value=fill_value),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_series_negative_axis_1():
+    snow_series = pd.Series(TEST_SERIES[0])
+    with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
+        snow_series.shift(periods=1, axis=1)
+
+
+# TODO: SNOW-1023324 add tests for freq != None
diff --git a/tests/integ/modin/series/test_size.py b/tests/integ/modin/series/test_size.py
new file mode 100644
index 00000000000..4543525b2d9
--- /dev/null
+++ b/tests/integ/modin/series/test_size.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "args, kwargs",
+    [
+        ([{"A": [1, 2, 3]}], {}),
+        ([{"A": []}], {}),
+        ([[]], {}),
+        ([None], {}),
+        (
+            [[1, 2, 3, 4]],
+            {
+                "index": native_pd.MultiIndex.from_product(
+                    [["A", "B"], ["C", "D"]], names=["Index1", "Index2"]
+                )
+            },
+        ),
+    ],
+    ids=[
+        "simple non-empty",
+        "empty column",
+        "no name empty column",
+        "data is None",
+        "multi index",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_size(args, kwargs):
+    eval_snowpark_pandas_result(
+        pd.Series(*args, **kwargs),
+        native_pd.Series(*args, **kwargs),
+        lambda df: df.size,
+        comparator=lambda x, y: x == y,
+    )
diff --git a/tests/integ/modin/series/test_sort_index.py b/tests/integ/modin/series/test_sort_index.py
new file mode 100644
index 00000000000..f3d3dc436bd
--- /dev/null
+++ b/tests/integ/modin/series/test_sort_index.py
@@ -0,0 +1,51 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@sql_count_checker(query_count=1)
+def test_sort_index_series(ascending, na_position, ignore_index):
+    native_series = native_pd.Series(["a", "b", np.nan, "d"], index=[3, 2, 1, np.nan])
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: s.sort_index(
+            ascending=ascending,
+            na_position=na_position,
+            ignore_index=ignore_index,
+        ),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_index_series_inplace_unsupported():
+    snow_series = pd.Series(["a", "b", np.nan, "d"], index=[3, 2, 1, np.nan])
+    with pytest.raises(NotImplementedError):
+        snow_series.sort_index(inplace=True)
+
+
+@sql_count_checker(query_count=0)
+def test_sort_index_series_multiindex_unsupported():
+    arrays = [
+        np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]),
+        np.array(["two", "one", "two", "one", "two", "one", "two", "one"]),
+    ]
+    snow_series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
+    with pytest.raises(NotImplementedError):
+        snow_series.sort_index(level=1)
+    with pytest.raises(NotImplementedError):
+        snow_series.sort_index(sort_remaining=True)
+    with pytest.raises(NotImplementedError):
+        snow_series.sort_index(ascending=[True, False])
diff --git a/tests/integ/modin/series/test_sort_values.py b/tests/integ/modin/series/test_sort_values.py
new file mode 100644
index 00000000000..14659f4943e
--- /dev/null
+++ b/tests/integ/modin/series/test_sort_values.py
@@ -0,0 +1,192 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+# TODO: SNOW-782594 Add tests for categorical data.
+
+
+@pytest.fixture(scope="function")
+def snow_df():
+    return pd.DataFrame(
+        {
+            "A": [1, 10, -1, 100, 0, -11],
+            "B": [321, 312, 123, 132, 231, 213],
+            "a": ["abc", " ", "", "ABC", "_", "XYZ"],
+            "b": ["1", "10", "xyz", "0", "2", "abc"],
+        },
+        index=native_pd.Index([1, 2, 3, 4, 5, 6], name="ind"),
+    )
+
+
+@pytest.fixture(scope="function")
+def snow_series(snow_df):
+    return pd.Series([1, 10, -1, 100, 0, -11], name="A")
+
+
+@pytest.mark.parametrize("by", ["A", "B", "a", "b"])
+@pytest.mark.parametrize("ascending", [True, False])
+@sql_count_checker(query_count=3)
+def test_sort_values(snow_df, by, ascending):
+    snow_series = snow_df[by]
+    native_series = snow_series.to_pandas()
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda s: s.sort_values(ascending=ascending)
+    )
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda s: s.sort_values(ascending=[ascending])
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_sort_values_by_ascending_length_mismatch_negative(snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(ascending=[True] * 5),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=r"Length of ascending \(5\) must be 1 for Series",
+    )
+
+
+@pytest.mark.parametrize("axis", [1, 2])
+@sql_count_checker(query_count=1)
+def test_sort_values_invalid_axis_negative(snow_series, axis):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=f"No axis named {axis} for object type Series",
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@sql_count_checker(query_count=2)
+def test_sort_values_inplace(snow_series, ascending):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas().copy(),
+        lambda s: s.sort_values(ascending=ascending, inplace=True),
+        inplace=True,
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+@sql_count_checker(query_count=2)
+def test_sort_values_nan(ascending, na_position):
+    snow_series = pd.Series([11, 2, np.nan, 1, np.nan, None, 8], name="A")
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(ascending=ascending, na_position=na_position),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_sort_values_invalid_na_position_negative(snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(na_position="nulls_first"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="invalid na_position: nulls_first",
+    )
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("kind", ["stable", "mergesort"])
+@sql_count_checker(query_count=2)
+def test_sort_values_stable(ascending, kind):
+    data = [3] * 10 + [1] * 10 + [2] * 10
+    snow_series = pd.Series(data, name="A")
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(ascending=ascending, kind=kind),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_sort_values_invalid_kind_negative(snow_series):
+    invalid_kind = "fastsort"
+    # We have slightly different error message as compared to nativ pandas.
+    msg = r"sort kind must be 'stable' or None \(got 'fastsort'\)"
+    with pytest.raises(ValueError, match=msg):
+        snow_series.sort_values(kind=invalid_kind)
+
+
+@sql_count_checker(query_count=1)
+def test_sort_values_datetime():
+    data = [
+        native_pd.Timestamp(x)
+        for x in [
+            "2004-02-11",
+            "2004-01-21",
+            "2004-01-26",
+            "2005-09-20",
+            "2010-10-04",
+            "2009-05-12",
+            "2008-11-12",
+            "2010-09-28",
+            "2010-09-28",
+        ]
+    ]
+    native_series = native_pd.Series(data, name="A")
+
+    snow_series = pd.Series(native_series)
+    eval_snowpark_pandas_result(snow_series, native_series, lambda s: s.sort_values())
+
+
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@sql_count_checker(query_count=2)
+def test_sort_values_ignore_index(snow_series, ascending, ignore_index):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(ascending=ascending, ignore_index=ignore_index),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@sql_count_checker(query_count=18, fallback_count=2, sproc_count=2)
+def test_sort_values_key(snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(key=lambda x: x + 5),
+    )
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values(key=lambda x: -x),
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_sort_values_repeat(snow_series):
+    eval_snowpark_pandas_result(
+        snow_series,
+        snow_series.to_pandas(),
+        lambda s: s.sort_values().sort_values(ascending=False),
+    )
diff --git a/tests/integ/modin/series/test_squeeze.py b/tests/integ/modin/series/test_squeeze.py
new file mode 100644
index 00000000000..bd59f9214b9
--- /dev/null
+++ b/tests/integ/modin/series/test_squeeze.py
@@ -0,0 +1,51 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_series_equal, eval_snowpark_pandas_result
+
+
+@pytest.fixture(params=[0, "index", None])
+def axis(request):
+    """
+    cache keyword to pass to to_datetime.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[1, "columns"])
+def axis_negative(request):
+    """
+    cache keyword to pass to to_datetime.
+    """
+    return request.param
+
+
+@sql_count_checker(query_count=3)
+def test_noop(axis):
+    s = pd.Series([1, 2, 3])
+    assert_series_equal(s, s.squeeze(axis=axis))
+
+
+@sql_count_checker(query_count=2)
+def test_squeeze_to_scalar(axis):
+    s = pd.Series([1])
+    assert 1 == s.squeeze(axis=axis)
+
+
+@sql_count_checker(query_count=0)
+def test_axis_negative(axis_negative):
+    eval_snowpark_pandas_result(
+        pd.Series([1, 2, 3]),
+        native_pd.Series([1, 2, 3]),
+        lambda s: s.squeeze(axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=f"No axis named {axis} for object type Series",
+    )
diff --git a/tests/integ/modin/series/test_str_accessor.py b/tests/integ/modin/series/test_str_accessor.py
new file mode 100644
index 00000000000..548c1e8db5b
--- /dev/null
+++ b/tests/integ/modin/series/test_str_accessor.py
@@ -0,0 +1,445 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+import re
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+TEST_DATA = [
+    "a%_.*?|&^$bc",
+    "abcxyz",
+    "xyzabc",
+    "axyzbc",
+    "xabcyz",
+    "xyz|%_.*?|&^$",
+    "xyzxyz",
+    "XYZ",
+    "abc\\nxyzabc",
+    "abcxyz\\nabc",
+    "  \\t\\nabc\\t\\f xyz\\tabc",
+    "xy\\nz",
+    "abc\nxyzabc",
+    "abcxyz\nabc",
+    "  \t\n\fabc\t\f xyz\tabc",
+    "xy\nz",
+    "a",
+    "aba",
+    " \t\r\n\f",
+    "",
+    None,
+]
+
+
+@pytest.mark.parametrize("func", ["startswith", "endswith"])
+@pytest.mark.parametrize(
+    "pat",
+    [
+        "",
+        "xyz",
+        "uvw",
+        ("xyz",),
+        ("uvw", "xyz"),
+        ("uvw",),
+        ("xyz", 1),
+        ("uvw", 1),
+        (1, 2),
+        (("xyz",),),
+        ((1,),),
+        "%_.*?|&^$",
+    ],
+)
+@pytest.mark.parametrize("na", [None, np.nan, native_pd.NA, True, False])
+@sql_count_checker(query_count=1)
+def test_str_startswith_endswith(func, pat, na):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: getattr(ser.str, func)(pat, na=na)
+    )
+
+
+@pytest.mark.parametrize("func", ["startswith", "endswith"])
+@pytest.mark.parametrize("pat", [1, True, datetime.date(2019, 12, 4), ["xyz"]])
+@sql_count_checker(query_count=0)
+def test_str_startswith_endswith_invalid_pattern(func, pat):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: getattr(ser.str, func)(pat=pat),
+        expect_exception=True,
+        expect_exception_match="expected a string or tuple, not",
+    )
+
+
+@pytest.mark.parametrize("func", ["startswith", "endswith"])
+@pytest.mark.parametrize("na", [1, "klm", datetime.date(2019, 12, 4), [True]])
+@sql_count_checker(query_count=0)
+def test_str_startswith_endswith_invlaid_na(func, na):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas doesn't support non-bool 'na' argument",
+    ):
+        getattr(snow_ser.str, func)(pat="xyz", na=na)
+
+
+@pytest.mark.parametrize(
+    "pat",
+    [
+        "",
+        "xyz",
+        "uvw",
+        "%_.*?|&^$",
+        r"x.[za]",
+        r"(.?:abc|xyz)[^abcxyz]",
+    ],
+)
+@pytest.mark.parametrize("case", [True, False])
+@pytest.mark.parametrize("flags", [0, re.IGNORECASE])
+@pytest.mark.parametrize("na", [None, np.nan, native_pd.NA, True, False])
+@pytest.mark.parametrize("regex", [True, False])
+@sql_count_checker(query_count=1)
+def test_str_contains(pat, case, flags, na, regex):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.str.contains(pat, case=case, flags=flags, na=na, regex=regex),
+    )
+
+
+@pytest.mark.parametrize("na", [1, "klm", datetime.date(2019, 12, 4), [True]])
+@sql_count_checker(query_count=0)
+def test_str_contains_invlaid_na(na):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas doesn't support non-bool 'na' argument",
+    ):
+        snow_ser.str.contains(pat="xyz", na=na)
+
+
+@pytest.mark.parametrize(
+    "pat",
+    [
+        "",
+        "xyz",
+        "^xyz",
+        "xyz$",
+        "uvw",
+        "%_.*?|&^$",
+        r".",
+        r"\\",
+        r"[a-z]{3}",
+    ],
+)
+@pytest.mark.parametrize("flags", [0, re.IGNORECASE, re.MULTILINE, re.DOTALL])
+@sql_count_checker(query_count=1)
+def test_str_count(pat, flags):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.str.count(pat, flags=flags)
+    )
+
+
+@pytest.mark.parametrize(
+    "to_strip", [None, np.nan, "", " ", "abcxyz", "zyxcba", "^$", "\nz"]
+)
+@sql_count_checker(query_count=1)
+def test_str_strip(to_strip):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.str.strip(to_strip=to_strip)
+    )
+
+
+@pytest.mark.parametrize("to_strip", [1, -2.0])
+@sql_count_checker(query_count=0)
+def test_str_strip_neg(to_strip):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas doesn't support non-str 'to_strip' argument",
+    ):
+        snow_ser.str.strip(to_strip=to_strip)
+
+
+@pytest.mark.parametrize("pat", ["xyz", "uv", "|", r".", r"[a-z]{3}"])
+@pytest.mark.parametrize("repl", ["w"])
+@pytest.mark.parametrize("n", [2, 1, -1])
+@pytest.mark.parametrize("case", [None, True, False])
+@pytest.mark.parametrize("flags", [0, re.IGNORECASE, re.MULTILINE, re.DOTALL])
+@pytest.mark.parametrize("regex", [True, False])
+@sql_count_checker(query_count=1)
+def test_str_replace(pat, repl, n, case, flags, regex):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.str.replace(
+            pat=pat, repl=repl, n=n, case=case, flags=flags, regex=regex
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "pat, repl, n, error",
+    [
+        (None, "a", 1, NotImplementedError),
+        (re.compile("a"), "a", 1, NotImplementedError),
+        (-2.0, "a", 1, NotImplementedError),
+        ("a", lambda m: m.group(0)[::-1], 1, NotImplementedError),
+        ("a", 1, 1, TypeError),
+        ("a", "a", "a", NotImplementedError),
+        ("a", "a", 0, NotImplementedError),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_str_replace_neg(pat, n, repl, error):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(error):
+        snow_ser.str.replace(pat=pat, repl=repl, n=n)
+
+
+@pytest.mark.parametrize("pat", [None, "a", "|", "%"])
+@pytest.mark.parametrize("n", [None, np.NaN, 3, 2, 1, 0, -1, -2])
+@sql_count_checker(query_count=1)
+def test_str_split(pat, n):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.str.split(pat=pat, n=n, expand=False, regex=None),
+    )
+
+
+@pytest.mark.parametrize("regex", [None, True])
+@pytest.mark.xfail(
+    reason="Snowflake SQL's split function does not support regex", strict=True
+)
+def test_str_split_regex(regex):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.str.split(pat=".*", n=-1, expand=False, regex=regex),
+    )
+
+
+@pytest.mark.parametrize(
+    "pat, n, expand, error",
+    [
+        ("", 1, False, ValueError),
+        (re.compile("a"), 1, False, NotImplementedError),
+        (-2.0, 1, False, NotImplementedError),
+        ("a", "a", False, NotImplementedError),
+        ("a", 1, True, NotImplementedError),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_str_split_neg(pat, n, expand, error):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    with pytest.raises(error):
+        snow_ser.str.split(pat=pat, n=n, expand=expand, regex=False)
+
+
+@pytest.mark.parametrize("func", ["isdigit", "islower", "isupper", "lower", "upper"])
+@sql_count_checker(query_count=1)
+def test_str_no_params(func):
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: getattr(ser.str, func)()
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2, 3, 4, 5],
+        [1.1, 2.0, None, 4.0, 5.3],
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_str_invalid_dtypes(data):
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.str,
+        expect_exception=True,
+        expect_exception_match="Can only use .str accessor with string values!",
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_str_len():
+    native_ser = native_pd.Series(TEST_DATA)
+    snow_ser = pd.Series(native_ser)
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.str.len())
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        ["FOO", "BAR", "Blah", "blurg"],
+        ["this TEST", "THAT", "test", "fInAl tEsT here"],
+        ["1", "*this", "%THAT", "4*FINAL test"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_capitalize_valid_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.capitalize()
+    )
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        [np.nan, "foo", np.nan, "fInAl tEsT here"],
+        [np.nan, np.nan, np.nan],
+        [np.nan, "str1", None, "STR2"],
+        [None, None, None],
+        ["", "", ""],
+        [np.nan, "1.0", None, "tHIs"],
+        [None, "foo", None, "bar"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_capitalize_nan_none_empty_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.capitalize()
+    )
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        ["FOO", "BAR", "Blah", "blurg"],
+        ["this TEST", "THAT", "test", "fInAl tEsT here"],
+        ["T", "Q a", "B P", "BA P", "Ba P"],
+        ["1", "*this", "%THAT", "4*FINAL test"],
+        [
+            "Crash",
+            "course",
+            "###Crash",
+            "###course",
+            "### Crash",
+            "### Crash ###",
+            "### Crash Course ###",
+            "###crash Course ###",
+            "###Crash Course###",
+            "crash Course",
+            "Crash course",
+        ],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_title_valid_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.title()
+    )
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        [np.nan, "foo", np.nan, "fInAl tEsT here"],
+        [np.nan, np.nan, np.nan],
+        [np.nan, "str1", None, "STR2"],
+        [None, None, None],
+        ["", "", ""],
+        [np.nan, "1.0", None, "tHIs"],
+        [None, "foo", None, "bar"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_title_nan_none_empty_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.title()
+    )
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        ["Foo", "BAR", "Blah", "blurg"],
+        ["this TEST", "That", "test", "Final Test Here"],
+        ["T", "Q a", "B P", "BA P", "Ba P"],
+        ["1", "*This", "%THAT", "4*FINAL test"],
+        [
+            "Crash",
+            "course",
+            "###Crash",
+            "###course",
+            "### Crash",
+            "### Crash ###",
+            "### Crash Course ###",
+            "###crash Course ###",
+            "###Crash Course###",
+            "crash Course",
+            "Crash course",
+        ],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_istitle_valid_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.istitle()
+    )
+
+
+@pytest.mark.parametrize(
+    "items",
+    [
+        [np.nan, "Foo", np.nan, "fInAl tEsT here", "Final Test Here"],
+        [np.nan, np.nan, np.nan],
+        [np.nan, "Str1", None, "STR2"],
+        [None, None, None],
+        ["", "", ""],
+        [np.nan, "1.0", None, "tHIs"],
+        [None, "foo", None, "bar"],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_str_istitle_nan_none_empty_input(items):
+    snow_series = pd.Series(items, dtype=object)
+    native_series = native_pd.Series(items, dtype=object)
+    eval_snowpark_pandas_result(
+        snow_series, native_series, lambda series: series.str.istitle()
+    )
diff --git a/tests/integ/modin/series/test_take.py b/tests/integ/modin/series/test_take.py
new file mode 100644
index 00000000000..211a89968df
--- /dev/null
+++ b/tests/integ/modin/series/test_take.py
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import assert_series_equal
+
+
+def test_series_take():
+    # TODO SNOW-933835 rewrite iloc will optimize SqlCounter
+    ser = pd.Series([-1, 5, 6, 2, 4])
+
+    actual = ser.take([1, 3, 4])
+    expected = pd.Series([5, 2, 4], index=[1, 3, 4])
+    with SqlCounter(query_count=2, join_count=2):
+        assert_series_equal(actual, expected)
+
+    actual = ser.take([-1, 3, 4])
+    expected = pd.Series([4, 2, 4], index=[4, 3, 4])
+    with SqlCounter(query_count=2, join_count=2):
+        assert_series_equal(actual, expected)
+
+    # Out-of-bounds testing - valid because .iloc is used in backend.
+    actual = ser.take([1, 10])
+    expected = pd.Series([5], index=[1])
+    with SqlCounter(query_count=2, join_count=2):
+        assert_series_equal(actual, expected)
+
+    actual = ser.take([2, 5])
+    expected = pd.Series([6], index=[2])
+    with SqlCounter(query_count=2, join_count=2):
+        assert_series_equal(actual, expected)
diff --git a/tests/integ/modin/series/test_to_snowflake.py b/tests/integ/modin/series/test_to_snowflake.py
new file mode 100644
index 00000000000..968a96e33d1
--- /dev/null
+++ b/tests/integ/modin/series/test_to_snowflake.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import VALID_PANDAS_LABELS
+
+
+@pytest.fixture(scope="module")
+def snow_series():
+    return pd.Series([1, 2, 3], name="a", index=pd.Index(["a", "b", "c"], name="index"))
+
+
+def _verify_columns(table_name: str, expected: list[str]) -> None:
+    actual = pd.read_snowflake(table_name).columns
+    assert actual.tolist() == expected
+
+
+def _verify_num_rows(session, table_name: str, expected: int) -> None:
+    actual = session.table(table_name).count()
+    assert actual == expected
+
+
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("index_labels", [None, ["my_index"]])
+@sql_count_checker(query_count=2)
+def test_to_snowflake_index(test_table_name, snow_series, index, index_labels):
+    snow_series.to_snowflake(
+        test_table_name, if_exists="replace", index=index, index_label=index_labels
+    )
+
+    expected_columns = []
+    if index:
+        # if index is retained in the result, add it as the first expected column
+        expected_index = ["index"]
+        if index_labels:
+            expected_index = index_labels
+        expected_columns = expected_columns + expected_index
+    # add the expected data columns
+    expected_columns = expected_columns + ["a"]
+    _verify_columns(test_table_name, expected_columns)
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowflake_index_name_conflict_negative(test_table_name, snow_series):
+    # Verify error is thrown if 'index_label' conflicts with existing column name
+    message = re.escape(
+        "Duplicated labels ['a'] found in index columns ['a'] and data columns ['a']. "
+        "Snowflake does not allow duplicated identifiers"
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_series.to_snowflake(test_table_name, if_exists="replace", index_label="a")
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowflake_index_label_none_raises(test_table_name):
+    snow_series = pd.Series([1, 2, 3], name="a", index=native_pd.Index([4, 5, 6]))
+    message = re.escape(
+        "Label None is found in the index columns [None], which is invalid in Snowflake."
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_series.to_snowflake(test_table_name, if_exists="replace", index=True)
+
+
+@sql_count_checker(query_count=2)
+def test_to_snowflake_multiindex(test_table_name, snow_series):
+    index = native_pd.MultiIndex.from_arrays(
+        [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
+    )
+    snow_series = pd.Series([1, 2, 5, 3], index=index, name="a")
+    snow_series.to_snowflake(test_table_name, if_exists="replace", index=True)
+    _verify_columns(test_table_name, ["number", "color", "a"])
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowflake_index_label_invalid_length_negative(test_table_name, snow_series):
+    with pytest.raises(
+        ValueError, match="Length of 'index_label' should match number of levels"
+    ):
+        snow_series.to_snowflake(
+            test_table_name, if_exists="replace", index=True, index_label=["a", "b"]
+        )
+
+
+def test_to_snowflake_if_exists(session, test_table_name, snow_series):
+    # Verify new table is created
+    with SqlCounter(query_count=3):
+        snow_series.to_snowflake(test_table_name, if_exists="fail", index=False)
+        _verify_columns(test_table_name, ["a"])
+
+    # Verify existing table is replaced with new data
+    with SqlCounter(query_count=2):
+        snow_series = pd.Series([4, 5, 6], name="b")
+        snow_series.to_snowflake(test_table_name, if_exists="replace", index=False)
+        _verify_columns(test_table_name, ["b"])
+
+    # Verify data is appended to existing table
+    with SqlCounter(query_count=5):
+        _verify_num_rows(session, test_table_name, 3)
+        snow_series.to_snowflake(test_table_name, if_exists="append", index=False)
+        _verify_columns(test_table_name, ["b"])
+        _verify_num_rows(session, test_table_name, 6)
+
+
+@sql_count_checker(query_count=4)
+def test_to_snowflake_if_exists_negative(session, test_table_name, snow_series):
+    # Create a table.
+    snow_series.to_snowflake(test_table_name, if_exists="fail", index=False)
+
+    # Verify attempt to write to existing table fails
+    with pytest.raises(ValueError, match=f"Table '{test_table_name}' already exists"):
+        snow_series.to_snowflake(test_table_name, if_exists="fail", index=False)
+
+    # Verify by default attempt to write to existing table fails
+    with pytest.raises(ValueError, match=f"Table '{test_table_name}' already exists"):
+        snow_series.to_snowflake(test_table_name, index=False)
+
+    # Verify invalid 'if_exists' value.
+    with pytest.raises(ValueError, match="'abc' is not valid for if_exists"):
+        snow_series.to_snowflake(test_table_name, if_exists="abc")
+
+
+@pytest.mark.parametrize("index_label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=2)
+def test_to_snowflake_index_column_labels(index_label, test_table_name, snow_series):
+    snow_series.to_snowflake(
+        test_table_name, if_exists="replace", index=True, index_label=index_label
+    )
+    _verify_columns(test_table_name, [str(index_label), "a"])
+
+
+@pytest.mark.parametrize("col_label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=2)
+def test_to_snowflake_data_column_labels(col_label, test_table_name, snow_series):
+    snow_series = snow_series.rename(col_label)
+    snow_series.to_snowflake(test_table_name, if_exists="replace", index=False)
+    _verify_columns(test_table_name, [str(col_label)])
diff --git a/tests/integ/modin/series/test_to_snowpark.py b/tests/integ/modin/series/test_to_snowpark.py
new file mode 100644
index 00000000000..65a741ef621
--- /dev/null
+++ b/tests/integ/modin/series/test_to_snowpark.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    extract_pandas_label_from_snowflake_quoted_identifier,
+)
+from snowflake.snowpark.types import LongType, StringType
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.fixture(scope="module")
+def snow_series_basic(session):
+    native_series = native_pd.Series(
+        [1, 2, 3, 4],
+        index=native_pd.Index(["A", "B", "C", "D"], name="index"),
+        name="SER",
+    )
+    snow_series = pd.Series(native_series)
+    return snow_series
+
+
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("index_label", [None, "test_index"])
+@sql_count_checker(query_count=0)
+def test_to_snowpark_from_pandas_series(snow_series_basic, index, index_label) -> None:
+    snowpark_df = snow_series_basic.to_snowpark(index=index, index_label=index_label)
+
+    start = 0
+    if index:
+        # verify the index column is included
+        expected_index_label = "index"
+        if index_label:
+            expected_index_label = index_label
+        assert (
+            extract_pandas_label_from_snowflake_quoted_identifier(
+                snowpark_df.schema[start].column_identifier.quoted_name
+            )
+            == expected_index_label
+        )
+        assert isinstance(snowpark_df.schema[start].datatype, StringType)
+        start += 1
+    # verify the rest of data column is included
+    assert snowpark_df.schema[start].column_identifier.quoted_name == '"SER"'
+    assert isinstance(snowpark_df.schema[start].datatype, LongType)
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_multiindex(test_table_name):
+    index = native_pd.MultiIndex.from_arrays(
+        [[1, 1, 2, 2], ["red", "blue", "red", "blue"]], names=("number", "color")
+    )
+    snow_series = pd.Series([1, 2, 5, 3], index=index, name="a")
+    snowpark_df = snow_series.to_snowpark()
+
+    assert snowpark_df.schema[0].column_identifier.quoted_name == '"number"'
+    assert snowpark_df.schema[1].column_identifier.quoted_name == '"color"'
+    assert snowpark_df.schema[2].column_identifier.quoted_name == '"a"'
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_with_no_series_name_raises(snow_series_basic) -> None:
+    snow_series_basic.name = None
+
+    message = re.escape(
+        "Label None is found in the data columns [None], which is invalid in Snowflake. "
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_series_basic.to_snowpark()
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_with_no_index_name_raises() -> None:
+    native_series = native_pd.Series(
+        [1, 2, 3, 4], index=native_pd.Index(["A", "B", "C", "D"]), name="SER"
+    )
+    snow_series = pd.Series(native_series)
+    message = re.escape(
+        "Label None is found in the index columns [None], which is invalid in Snowflake. "
+    )
+    with pytest.raises(ValueError, match=message):
+        snow_series.to_snowpark()
diff --git a/tests/integ/modin/series/test_transpose.py b/tests/integ/modin/series/test_transpose.py
new file mode 100644
index 00000000000..1e733a52262
--- /dev/null
+++ b/tests/integ/modin/series/test_transpose.py
@@ -0,0 +1,81 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+
+@pytest.mark.parametrize(
+    "data, expected_query_count",
+    [
+        ([1, 2, 3, 4, 5, 6], 1),
+        ([1, 1, 1, 1, 1], 1),  # value all same
+        ([None], 1),
+        ([np.nan, np.nan, np.nan], 1),
+        ([pd.NaT, pd.NaT, pd.NaT], 1),
+        ([6.0, 7.1, np.nan], 1),
+        ([None, 1, 2, 3], 1),
+        (["abc", None, ("a", "c")], 1),
+        (native_pd.Series(["991"] * 7), 1),
+    ],
+)
+def test_series_transpose(data, expected_query_count):
+    with SqlCounter(query_count=expected_query_count):
+        native_series = native_pd.Series(data)
+        snow_series = pd.Series(native_series)
+
+        eval_snowpark_pandas_result(
+            snow_series,
+            native_series,
+            lambda series: series.transpose(),
+        )
+
+
+@sql_count_checker(query_count=1)
+def test_series_transpose_empty():
+    empty_data = []
+    native_series = native_pd.Series(empty_data)
+    snow_series = pd.Series(empty_data)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda series: series.T,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_transpose_multi_index():
+    data = [1, 2, 3, 4, 5]
+    index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
+
+    native_series = native_pd.Series(data=data, index=index)
+    snow_series = pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda series: series.T,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_transpose_index_no_names():
+    data = [1, 2, 3, 4, 5]
+    index = [None, None, None, None, None]
+
+    native_series = native_pd.Series(data=data, index=index)
+    snow_series = pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda series: series.T,
+    )
diff --git a/tests/integ/modin/series/test_unary_op.py b/tests/integ/modin/series/test_unary_op.py
new file mode 100644
index 00000000000..5eeed59717f
--- /dev/null
+++ b/tests/integ/modin/series/test_unary_op.py
@@ -0,0 +1,136 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import math
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+unary_operators = pytest.mark.parametrize("func", [abs, lambda x: -x])
+
+
+def cast_using_snowflake_rules(func, x):
+    if x is None:
+        return np.nan
+    return func(x)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_ser_unary_all_pos(func):
+    data = [10, 1, 1.5]
+
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, func)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_ser_unary_mixed_dtypes(func):
+    data = [100000, math.e, np.float16(13.22), np.int32(32323123)]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, func)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_ser_unary_index(func):
+    data = [-100000, math.pi, 3]
+    native_ser = native_pd.Series(data=data, index=["a", "b", "c"])
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, func)
+
+
+@unary_operators
+@sql_count_checker(query_count=1)
+def test_ser_unary_np_types(func):
+    data = [-np.int16(1), 1, 1.5]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, func)
+
+
+@unary_operators
+@pytest.mark.parametrize("value_to_test", [True, False, None])
+@sql_count_checker(query_count=1)
+def test_ser_unary_np_none_bool(func, value_to_test):
+    data = [1.33, -2.33, value_to_test]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, func)
+
+
+@unary_operators
+@pytest.mark.parametrize("data", [[None], [-1, None, 2, True], [1.33, None, False, -3]])
+@sql_count_checker(query_count=1)
+def test_ser_unary_invalid_in_native_negative(func, data):
+    # testing and documenting behaviors that work in SF
+    # but not in native pandas.
+
+    expected_data = [cast_using_snowflake_rules(func, x) for x in data]
+    native_ser = native_pd.Series(data)
+    snow_ser = pd.Series(native_ser)
+
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+        func(snow_ser), native_pd.Series(expected_data)
+    )
+
+    with pytest.raises(TypeError, match="bad operand type"):
+        func(native_ser)
+
+
+@unary_operators
+@pytest.mark.parametrize("data", [[True], [False, True]])
+@sql_count_checker(query_count=0)
+def test_ser_unary_invalid_in_sf_negative(func, data):
+    # testing and documenting behaviors that work in native
+    # pandas but not in SF
+    snow_ser = pd.Series(data)
+    with pytest.raises(
+        SnowparkSQLException, match="Invalid argument types for function"
+    ):
+        func(snow_ser).to_pandas()
+
+
+@unary_operators
+@pytest.mark.parametrize(
+    "invalid_value, expected_sf_error",
+    [
+        ([None, True], "Invalid argument types for function"),
+        ([False, True, None], "Invalid argument types for function"),
+        ([(3, 3), 2], "Failed to cast variant value"),
+        ("bad_str", "is not recognized"),
+        ([3, "bad_str"], "Failed to cast variant value"),
+        ([3, [2, 3]], "Failed to cast variant value"),
+    ],
+)
+def test_ser_unary_invalid_in_both_native_and_sf_negative(
+    func, invalid_value, expected_sf_error
+):
+    native_ser = native_pd.Series(invalid_value)
+    snow_ser = pd.Series(native_ser)
+
+    with pytest.raises(TypeError, match="bad operand type"):
+        func(native_ser)
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException, match=expected_sf_error):
+            func(snow_ser).to_pandas()
diff --git a/tests/integ/modin/series/test_unique.py b/tests/integ/modin/series/test_unique.py
new file mode 100644
index 00000000000..7c909b3ad0d
--- /dev/null
+++ b/tests/integ/modin/series/test_unique.py
@@ -0,0 +1,76 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Any
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_values_equal, eval_snowpark_pandas_result
+
+
+def _make_nan_interleaved_float_series():
+    ser = native_pd.Series([1.2345] * 100)
+    ser[::2] = np.nan
+    return ser
+
+
+HOMOGENEOUS_INPUT_DATA_FOR_SERIES = [
+    [],
+    [2, 1, 3, 3],
+    [1, 1, 1, 1],
+    [12.0, 11.999999, 11.999999],
+    ["A", "A", "C", "C", "A"],
+    [None, "A", None, "B"],
+    native_pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64),
+    _make_nan_interleaved_float_series(),
+]
+
+
+@pytest.mark.parametrize("input_data", iter(HOMOGENEOUS_INPUT_DATA_FOR_SERIES))
+@sql_count_checker(query_count=1)
+def test_unique_homogeneous_data(input_data: list[Any]):
+
+    snowpark_pandas_series = pd.Series(input_data)
+    native_series = native_pd.Series(input_data)
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_series,
+        native_series,
+        lambda s: s.unique(),
+        comparator=assert_values_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_unique_heterogeneous_data():
+    input_data = ["A", 12, 56, "A"]
+    snowpark_pandas_series = pd.Series(input_data)
+    native_series = native_pd.Series(input_data)
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_series,
+        native_series,
+        lambda s: s.unique(),
+        comparator=assert_values_equal,
+    )
+
+
+# sorting only works for homogeneous data, e.g. < is not defined for int and str
+@pytest.mark.parametrize("input_data", HOMOGENEOUS_INPUT_DATA_FOR_SERIES)
+@sql_count_checker(query_count=1)
+def test_unique_post_sort_values(input_data: list[Any]):
+    def pipeline(series):
+        sorted_series = series.sort_values()
+        return sorted_series.unique()
+
+    snowpark_pandas_series = pd.Series(input_data)
+    native_series = native_pd.Series(input_data)
+
+    eval_snowpark_pandas_result(
+        snowpark_pandas_series, native_series, pipeline, comparator=assert_values_equal
+    )
diff --git a/tests/integ/modin/series/test_value_counts.py b/tests/integ/modin/series/test_value_counts.py
new file mode 100644
index 00000000000..cc267a5887f
--- /dev/null
+++ b/tests/integ/modin/series/test_value_counts.py
@@ -0,0 +1,81 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+TEST_DATA = [
+    [1, 2, 2, 3, 3, 3],
+    ["1", "2", "2", "3", "3", "3"],
+]
+
+
+TEST_NULL_DATA = [
+    [1, 2, 3, 3, 3, None, np.nan, 4],
+    ["2", "3", "3", "1", "1", "1", None],
+    [None, None, np.nan],
+]
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("has_name", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_sort_ascending(test_data, sort, ascending, has_name):
+    snow_series = pd.Series(test_data, name="name" if has_name else None)
+    native_series = native_pd.Series(test_data, name="name" if has_name else None)
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda x: x.value_counts(sort=sort, ascending=ascending),
+    )
+
+
+@pytest.mark.parametrize("test_data", TEST_DATA)
+@pytest.mark.parametrize("has_name", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_normalize(test_data, has_name):
+    snow_series = pd.Series(test_data, name="name" if has_name else None).value_counts(
+        normalize=True
+    )
+    native_series = native_pd.Series(
+        test_data, name="name" if has_name else None
+    ).value_counts(normalize=True)
+    # snowpark pandas will return a series with decimal type
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+        snow_series,
+        native_series,
+    )
+
+
+@pytest.mark.parametrize("test_data", TEST_NULL_DATA)
+@pytest.mark.parametrize("dropna", [True, False])
+@sql_count_checker(query_count=1)
+def test_value_counts_dropna(test_data, dropna):
+    snow_series = pd.Series(test_data)
+    native_series = native_pd.Series(test_data)
+    # if NULL value is not dropped, the index will contain NULL
+    # Snowpark pandas returns string type but pandas returns mixed type
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda x: x.value_counts(dropna=dropna),
+        check_index_type=dropna,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_value_counts_bins():
+    with pytest.raises(NotImplementedError, match="bins argument is not yet supported"):
+        pd.Series([1, 2, 3, 4]).value_counts(bins=3)
diff --git a/tests/integ/modin/series/test_where.py b/tests/integ/modin/series/test_where.py
new file mode 100644
index 00000000000..c6de64a34f4
--- /dev/null
+++ b/tests/integ/modin/series/test_where.py
@@ -0,0 +1,309 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    eval_snowpark_pandas_result,
+)
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_series_where_with_cond_series():
+    data = range(5)
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 0))
+
+    cond = [True, False]
+    cond_snow_ser = pd.Series(cond)
+    cond_native_ser = native_pd.Series(cond)
+    other = 99
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(cond_snow_ser, other)
+        if isinstance(ser, pd.Series)
+        else ser.where(cond_native_ser, other),
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_series_where_with_cond_and_other_series():
+    data = range(5)
+    snow_ser = pd.Series(data)
+    native_ser = native_pd.Series(data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 0))
+
+    cond = [True, False]
+    cond_snow_ser = pd.Series(cond)
+    cond_native_ser = native_pd.Series(cond)
+
+    other = [123.45, 54.321]
+    other_snow_ser = pd.Series(other)
+    other_native_ser = native_pd.Series(other)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(cond_snow_ser, other_snow_ser)
+        if isinstance(ser, pd.Series)
+        else ser.where(cond_native_ser, other_native_ser),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-914228: Do not currently handle duplicates in index correctly"
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_where_duplicate_labels():
+    data = [1, 2, 3, 4, 5]
+    index = ["a", "b", "c", "b", "a"]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 3))
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_series_where_multi_index():
+    data = [1, 2, 3, 4, 5]
+    index = [("a", "x"), ("b", "y"), ("c", "z"), ("d", "u"), ("e", "v")]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda ser: ser.where(ser > 3))
+
+
+@pytest.mark.xfail(
+    reason="SNOW-914228: Do not currently handle duplicates in index correctly"
+)
+@sql_count_checker(query_count=8, join_count=1, fallback_count=1, sproc_count=1)
+def test_series_where_index_no_names():
+    data = [1, 2, 3, 4, 5]
+    index = [None, None, None, None, None]
+
+    snow_ser = pd.Series(data=data, index=index)
+    native_ser = native_pd.Series(data=data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser, native_ser, lambda ser: ser.where(ser > 3, -ser)
+    )
+
+
+@sql_count_checker(query_count=4, join_count=1)
+def test_series_where_with_np_array_cond():
+    data = [1, 2]
+    cond = np.array([True, False])
+
+    snow_ser = pd.Series(data=data)
+    native_ser = native_pd.Series(data=data)
+
+    eval_snowpark_pandas_result(snow_ser, native_ser, lambda df: df.where(cond))
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_where_with_series_cond_single_index_different_names():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+
+    snow_ser = pd.Series(data, index=pd.Index(["a", "b", "c"], name="Y"))
+    native_ser = native_pd.Series(data, index=pd.Index(["a", "b", "c"], name="Y"))
+
+    cond_snow_ser = pd.Series(cond, index=pd.Index(["a", "b", "c"], name="X"))
+    cond_native_ser = native_pd.Series(cond, index=pd.Index(["a", "b", "c"], name="X"))
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(cond_snow_ser, 99.9)
+        if isinstance(ser, pd.Series)
+        else ser.where(cond_native_ser, 99.9),
+        comparator=assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_where_with_duplicated_index_aligned():
+    data = [1, 2, 3]
+    cond = [False, True, False]
+    index = pd.Index(["a", "a", "c"], name="index")
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    cond_snow_ser = pd.Series(cond, index=index)
+    cond_native_ser = native_pd.Series(cond, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(cond_snow_ser, 99)
+        if isinstance(ser, pd.Series)
+        else ser.where(cond_native_ser, 99),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_where_with_lambda_cond():
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(lambda x: x >= 6, 99),
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_series_where_with_lambda_cond_returns_singleton_should_fail():
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        lambda ser: ser.where(lambda x: True, 99),
+        assert_exception_equal=True,
+        expect_exception=True,
+        expect_exception_match="Array conditional must be same shape as self",
+        expect_exception_type=ValueError,
+    )
+
+
+@pytest.mark.parametrize(
+    "other, sql_count, join_count",
+    [(lambda x: -x.iloc[0], 5, 3), (lambda x: x**2, 4, 2)],
+)
+def test_series_where_with_lambda_other(other, sql_count, join_count):
+    data = [1, 6, 7, 4]
+    index = pd.Index(["a", "b", "c", "d"])
+
+    snow_ser = pd.Series(data, index=index)
+    native_ser = native_pd.Series(data, index=index)
+
+    with SqlCounter(query_count=sql_count, join_count=join_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.where([True, False, True, False], other),
+        )
+
+
+@pytest.mark.parametrize("cond", [1, [1]], ids=["scalar_cond", "scalar_cond_in_list"])
+def test_series_where_with_scalar_cond(cond):
+    native_ser = native_pd.Series([1, 2, 3])
+    snow_ser = pd.Series(native_ser)
+
+    sql_count = 1 if isinstance(cond, list) else 0
+
+    with SqlCounter(query_count=sql_count):
+        eval_snowpark_pandas_result(
+            snow_ser,
+            native_ser,
+            lambda ser: ser.where(cond, 1),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Array conditional must be same shape as self",
+            assert_exception_equal=True,
+        )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_series_where_series_cond_unmatched_index():
+    data = [1, 2, 3, 4]
+    index1 = [0, 1, 2, 3]
+    index2 = [4, 5, 6, 7]
+
+    snow_ser = pd.Series(data, index=index1)
+    snow_cond = pd.Series([True, False, True, False], index=index2)
+
+    native_ser = native_pd.Series(data, index=index1)
+    native_cond = native_pd.Series([True, False, True, False], index=index2)
+
+    def perform_where(series):
+        if isinstance(series, pd.Series):
+            return series.where(snow_cond, -1)
+        else:
+            return series.where(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_where,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_where_short_series_cond(index):
+    data = [1, 2, 3, 4]
+    if index != "matched_index":
+        index = [7, 8, 9]
+    else:
+        index = None
+
+    snow_ser = pd.Series(data)
+    snow_cond = pd.Series([True, False, True], index=index)
+
+    native_ser = native_pd.Series(data)
+    native_cond = native_pd.Series([True, False, True], index=index)
+
+    def perform_where(series):
+        if isinstance(series, pd.Series):
+            return series.where(snow_cond, -1)
+        else:
+            return series.where(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_where,
+    )
+
+
+@sql_count_checker(query_count=1, join_count=1)
+@pytest.mark.parametrize("index", ["matched_index", "unmatched_index"])
+def test_series_where_long_series_cond(index):
+    data = [1, 2, 3, 4]
+    if index != "matched_index":
+        index = [7, 8, 9, 10, 11]
+    else:
+        index = None
+
+    snow_ser = pd.Series(data)
+    snow_cond = pd.Series([True, False, True, False, True], index=index)
+
+    native_ser = native_pd.Series(data)
+    native_cond = native_pd.Series([True, False, True, False, True], index=index)
+
+    def perform_where(series):
+        if isinstance(series, pd.Series):
+            return series.where(snow_cond, -1)
+        else:
+            return series.where(native_cond, -1)
+
+    eval_snowpark_pandas_result(
+        snow_ser,
+        native_ser,
+        perform_where,
+    )
diff --git a/tests/integ/modin/sql_counter.py b/tests/integ/modin/sql_counter.py
new file mode 100644
index 00000000000..d047e0e4531
--- /dev/null
+++ b/tests/integ/modin/sql_counter.py
@@ -0,0 +1,637 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import inspect
+import os
+import re
+import sys
+import threading
+import traceback
+from typing import Optional
+
+import pytest
+from decorator import decorator
+from pandas._typing import Scalar
+
+from snowflake.snowpark import QueryRecord
+from snowflake.snowpark.modin.plugin.default2pandas.stored_procedure_utils import (
+    FALLBACK_TAG,
+)
+from snowflake.snowpark.session import Session
+
+UPDATED_SUFFIX = "updated"
+ORIGINAL_SUFFIX = "original"
+STATUS_REPORT_FILE = "sql_counter_report.txt"
+
+SQL_COUNT_CHECKER = "@sql_count_checker"
+CALL = "CALL "
+JOIN = " JOIN "
+TEMP_PROCEDURE = "TEMP_PROCEDURE"
+TEMP_FUNCTION = "TEMP_FUNCTION"
+TEMP_TABLE_FUNCTION = "TEMP_TABLE_FUNCTION"
+SELECT = "SELECT "
+INSERT = "INSERT "
+WITH = "WITH "
+CREATE_TEMP_TABLE = "CREATE  TEMPORARY  TABLE"
+UNION = " UNION "
+
+NO_CHECK = "no_check"
+
+QUERY_COUNT_PARAMETER = "query_count"
+FALLBACK_COUNT_PARAMETER = "fallback_count"
+JOIN_COUNT_PARAMETER = "join_count"
+SPROC_COUNT_PARAMETER = "sproc_count"
+UDF_COUNT_PARAMETER = "udf_count"
+UDTF_COUNT_PARAMETER = "udtf_count"
+SELECT_COUNT_PARAMETER = "select_count"
+UNION_COUNT_PARAMETER = "union_count"
+EXPECT_HIGH_COUNT = "expect_high_count"
+HIGH_COUNT_REASON = "high_count_reason"
+
+SQL_COUNT_PARAMETERS = [
+    QUERY_COUNT_PARAMETER,
+    JOIN_COUNT_PARAMETER,
+    FALLBACK_COUNT_PARAMETER,
+    SPROC_COUNT_PARAMETER,
+    UDF_COUNT_PARAMETER,
+    UDTF_COUNT_PARAMETER,
+    SELECT_COUNT_PARAMETER,
+    UNION_COUNT_PARAMETER,
+]
+BOOL_PARAMETERS = [EXPECT_HIGH_COUNT]
+
+SQL_COUNTER_CALLED = "sql_counter_called"
+
+# For fallback operations we expect they can take at least 8 queries (each) based on our current design, since this
+# is independent of the specific test logic, we adjust the query count down based on the fallback query count threshold.
+# The way to think of this is we treat a baseline of 8 queries as 1 query for each fallback with respect to the high
+# query count check.  For example, if a test with query_count=12, fallback_count=1 would check 12-(8-1)=5<10 so it
+# would pass.  However, test with query_count=20, fallback_count=1 would check 20-(8-1)=13>10 would fail the high query
+# count check.
+FALLBACK_QUERY_COUNT_THRESHOLD = 8
+
+# The high count threshold is checked for each test, if the adjusted query count exceeds this threshold then
+# the test will fail with an explanation on how to mitigate.
+HIGH_QUERY_COUNT_THRESHOLD = 9
+
+
+# The following queries aren't related to unit test queries and result in inconsistency in counts across environments.
+# 1. create scoped stage generally occurs once in the session so not consistent when it executes
+# 2. snowpark.zip already exists it won't upload again
+# 3. snowpark will inline the python sproc in some cases so this results in inconsistent query counts, to mitigate this
+# we don't count uploading the udf as a file since that doesn't always happen due to variation in generated byte code
+# or compression size
+# 4. select package_name, array_agg("VERSION") ... is to validate the package to be used for fallback stored procedure,
+# it only runs at the first time of creating a fallback stored procedure
+# 5. test_table_fixture does a drop table which is inconsistently included but ultimately not related to the tested code
+# These cases should be excluded in our query counts.
+FILTER_OUT_QUERIES = [
+    ["create SCOPED TEMPORARY", "stage if not exists"],
+    ["PUT", "file:///tmp/placeholder/snowpark.zip"],
+    ["PUT", "file:///tmp/placeholder/udf_py_"],
+    ['SELECT "PACKAGE_NAME"', 'array_agg("VERSION")'],
+    ["drop table if exists", "TESTTABLENAME"],
+]
+
+# define global at module-level
+sql_count_records = {}
+
+
+class SqlCounter:
+    """
+    SqlCounter is an object that counts metrics related to snowpark queries.  This includes things like query counts
+    and join counts.  It can be extended to cover other counts as well.
+
+    See SQL_COUNT_PARAMETERS for list of currently supported sql counts.
+
+    SqlCounter can be invoked through the sql_count_checker decorator or instantiated directly.  When providing count
+    parameters, if any count parameter is not provided it assumed to have expected value of 0, so these parameters
+    do not need to be explicitly provided.  Here is an example:
+
+        with SqlCounter(query_count=5, udf_count=1):
+            ...
+
+        This will check the query_count is 5 and udf_count is 1, and *all* other counts (like fallback_count, etc)
+        are expected to equal 0.
+
+    If we do not expect the test to invoke any queries, we still recommend to at least explicitly check the
+    query_count is 0 through the sql_count_checker decorator.  If we do not check at all then if the underlying code
+    changes, and later introduce queries we would not catch this.  In the rare case where we need to ensure that no
+    query validation happens (as opposed to validating no query happens) we can use the no_check=True argument.
+    """
+
+    _record_mode = False
+
+    def __init__(
+        self,
+        no_check=False,
+        log_stack_trace=True,
+        high_count_expected=False,
+        high_count_reason=None,
+        **kwargs,
+    ) -> "SqlCounter":
+        self._queries: list[QueryRecord] = []
+        self._no_check = no_check
+
+        # Save any expected sql counts initialized at start up.
+        self._expected_sql_counts = {}
+        for key, value in kwargs.items():
+            if key not in SQL_COUNT_PARAMETERS:
+                raise ValueError(f"Unrecognized parameter to SqlCounter '{key}'")
+            self._expected_sql_counts[key] = value
+
+        # Setup lookup functions to calculate different counts later.
+        count_params = list(filter(lambda x: x.startswith("actual_"), dir(self)))
+        self._actual_sql_count_helpers = {
+            ct[len("actual_") :]: self.__getattribute__(ct) for ct in count_params
+        }
+
+        # Record mode is used when auto-annotating step runs.
+        self._record_mode = False
+
+        self._log_stack_trace = log_stack_trace
+
+        self._expect_high_count = high_count_expected
+        self._high_count_reason = high_count_reason
+
+        if self._no_check:
+            self.session = None
+        else:
+            self.session = Session.SessionBuilder().getOrCreate()
+            # Add SqlCounter as a snowpark query listener.
+            self.session._conn.add_query_listener(self)
+
+    @staticmethod
+    def set_record_mode(record_mode):
+        """Record mode means the SqlCounter does not assert any results, but rather collects them so they can
+        be inspected separately, such as with the auto-annotation step."""
+        SqlCounter._record_mode = record_mode
+
+    def clear(self):
+        """Reset the SqlCounter to start counting from 0."""
+        self._queries = []
+
+    def __enter__(self):
+        """Context manager enter by resetting counts."""
+        self.clear()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit checks for expected sql counts."""
+        # If we're exiting this context because of an exception, i.e. exc_type
+        # is not None, there's no need to check the SQL counts (see
+        # SNOW-1042244).
+        if exc_type is None:
+            self.expects(**self._expected_sql_counts)
+        if self.session is not None:
+            self.session._conn.remove_query_listener(self)
+        self._mark_as_dead()
+
+    def _add_query(self, query_record: QueryRecord):
+        self._queries.append(query_record)
+
+    def expects(self, **kwargs):
+        """
+        Validate expectation of sql counts.  We avoid using asserts because we do not want to interrupt the
+        test run if this fails, since it would mask functional failures.  By using pytest assume the validation
+        happens at the end of the test.
+        """
+        mark_sql_counter_called()
+        self._check_if_dead()
+
+        if self._no_check or SqlCounter._record_mode:
+            return
+
+        if len(kwargs) == 0:
+            raise AssertionError("SqlCounter not configured for test.")
+
+        failed = False
+        # Get the actual counts and check they match assumptions.
+        actual_counts = self.get_actual_counts()
+        stack_trace = (
+            "\nOriginal stack trace:\n"
+            + "".join(
+                filter(lambda s: "site-package" not in s, traceback.format_stack())
+            )
+            if self._log_stack_trace
+            else ""
+        )
+
+        for key in kwargs.keys():
+            if key in BOOL_PARAMETERS:
+                continue
+            actual_count = actual_counts[key]
+            expected_count = kwargs[key]
+            if expected_count is None:
+                expected_count = 0
+            failed = failed or expected_count != actual_count
+            pytest.assume(
+                expected_count == actual_count,
+                f"Sql count check '{key}' failed.  expected_{key}={expected_count}, actual_{key}={actual_count}{stack_trace}",
+            )
+
+        # If there are no failures, then check if we fail due to high query count.
+        if not failed:
+            query_count = (
+                actual_counts[QUERY_COUNT_PARAMETER]
+                if QUERY_COUNT_PARAMETER in kwargs.keys()
+                else 0
+            )
+            fallback_count = (
+                actual_counts[FALLBACK_COUNT_PARAMETER]
+                if FALLBACK_COUNT_PARAMETER in kwargs.keys()
+                else 0
+            )
+
+            # For every fallback, we count FALLBACK_COUNT_PARAMETER as only 1 query in the adjusted_query_count.
+            adjusted_query_count = (
+                query_count - (FALLBACK_QUERY_COUNT_THRESHOLD - 1) * fallback_count
+            )
+
+            high_query_count_check = adjusted_query_count <= HIGH_QUERY_COUNT_THRESHOLD
+            suppress_high_query_count = (
+                self._expect_high_count and self._high_count_reason is not None
+            )
+
+            pytest.assume(
+                high_query_count_check or suppress_high_query_count,
+                f"""
+    Sql count check '{QUERY_COUNT_PARAMETER}' failed on high query count, adjusted_query_count={adjusted_query_count}>{HIGH_QUERY_COUNT_THRESHOLD}.
+    The test is generating too many queries, please investigate the high query count for potential performance problems
+    and/or consider refactoring the sql count checks to be more granular.  To suppress this failure, please create a
+    jira (if there is a follow up action required) and add the arguments 'high_count_expected=True' and
+    'high_count_reason="SNOW-JIRA# <Explanation>' with explanation for suppression to the sql check.
+                """,
+            )
+            failed = True
+
+        # If there are any failures, print out all the captured queries so clear which are being counted.
+        if failed:
+            title = f"{'='*20} SqlCounter Captured Queries {'='*20}"
+            print(title, file=sys.stderr)
+            for query in self._get_actual_queries():
+                print(query, file=sys.stderr)
+            print("=" * len(title), file=sys.stderr)
+
+        self.clear()
+
+    def _get_actual_queries(self):
+        return list(
+            filter(
+                lambda q: not any(
+                    [
+                        all([p.upper() in q.upper() for p in fw])
+                        for fw in FILTER_OUT_QUERIES
+                    ]
+                ),
+                list(map(lambda q: q.sql_text, self._queries)),
+            )
+        )
+
+    def _count_by_query_substr(self, starts_with=None, contains=None):
+        if starts_with is None:
+            starts_with = []
+        if contains is None:
+            contains = []
+        return sum(
+            bool(x)
+            for x in map(
+                lambda q: (
+                    not starts_with or any([q.startswith(sw) for sw in starts_with])
+                )
+                and all([c in q for c in contains]),
+                self._get_actual_queries(),
+            )
+        )
+
+    def _count_instances_by_query_substr(self, starts_with=None, contains=None):
+        starts_with = starts_with or []
+        contains = contains or []
+        return sum(
+            map(
+                lambda q: sum(
+                    [q.count(c) for c in contains]
+                    if (
+                        not starts_with or any([q.startswith(sw) for sw in starts_with])
+                    )
+                    else [0]
+                ),
+                self._get_actual_queries(),
+            )
+        )
+
+    def actual_query_count(self):
+        """Return number of sql queries"""
+        return len(self._get_actual_queries())
+
+    def actual_join_count(self):
+        """Return number of joins across all sql queries"""
+        return self._count_instances_by_query_substr(contains=[JOIN])
+
+    def actual_fallback_count(self):
+        return self._count_by_query_substr(contains=[CALL, FALLBACK_TAG])
+
+    def actual_sproc_count(self):
+        return self._count_by_query_substr(contains=[CALL])
+
+    def actual_udtf_count(self):
+        return self._count_by_query_substr(
+            [SELECT, INSERT, CREATE_TEMP_TABLE], [TEMP_TABLE_FUNCTION]
+        )
+
+    def actual_udf_count(self):
+        return self._count_by_query_substr([SELECT, INSERT], [TEMP_FUNCTION])
+
+    def actual_select_count(self):
+        return self._count_by_query_substr([SELECT])
+
+    def actual_union_count(self):
+        return self._count_instances_by_query_substr(contains=[UNION])
+
+    def get_actual_counts(self):
+        """Retrieve all actual counts so far."""
+        actual_counts = {}
+        for key, get_count in self._actual_sql_count_helpers.items():
+            actual_counts[key] = get_count()
+        return actual_counts
+
+    def _check_if_dead(self):
+        """If SqlCounter is declared dead, it should not be used again."""
+        if not self._no_check:
+            assert (
+                self.session is not None
+            ), "SqlCounter is dead and can no longer be used."
+
+    def _mark_as_dead(self):
+        """Mark the SqlCounter as dead so it can no longer be used."""
+        self.session = None
+
+
+@decorator
+def sql_count_checker(
+    func,
+    no_check=None,
+    high_count_expected=False,
+    high_count_reason=None,
+    query_count=None,
+    join_count=None,
+    fallback_count=None,
+    sproc_count=None,
+    udf_count=None,
+    udtf_count=None,
+    union_count=None,
+    *args,
+    **kwargs,
+):
+    """SqlCounter decorator that automatically validates the sql counts when test finishes."""
+    sql_counter = SqlCounter(
+        no_check=no_check,
+        log_stack_trace=False,
+        high_count_expected=high_count_expected,
+        high_count_reason=high_count_reason,
+    )
+
+    all_args = inspect.getargvalues(inspect.currentframe())
+    count_kwargs = {
+        key: value
+        for key, value in list(
+            filter(lambda k: k[0].endswith("_count"), all_args.locals.items())
+        )
+    }
+
+    result = func(*args, **kwargs)
+    try:
+        sql_counter.expects(**count_kwargs)
+    finally:
+        try:
+            sql_counter.close()
+        except Exception:
+            pass
+    return result
+
+
+def get_readable_sql_count_values(tr):
+    count_values = ", ".join(
+        [
+            f"{key}={tr[key]}"
+            for key in SQL_COUNT_PARAMETERS
+            if key in tr and tr[key] and tr[key] != 0
+        ]
+    )
+    if len(count_values) == 0:
+        return f"{SQL_COUNT_PARAMETERS[0]}=0"
+    return count_values
+
+
+def update_test_code_with_sql_counts(
+    sql_count_records: dict[str, dict[str, list[dict[str, Optional[Scalar]]]]]
+):
+    """This helper takes sql count records and rewrites the source test files to validate sql counts where possible.
+
+    This also creates a status report file that is useful to summarize the actions taken or that need to be taken
+    for instrumenting the sql_counter checks.  The status report file will include results for all tests that were
+    part of the run, the file is in the tests root file path.  Each section will be named by the test_file and include
+    a line per test_file, test_parameter combination along with the status.
+    """
+    last_status_file = None
+
+    # Iterate through each sql count record, this is nested dictionary structured as:
+    #     sql_count_records[test_file][test_name] -> dict[Str, Scalar]
+    # The valid keys are:
+    #     "test_name" for alternative reference
+    #     "test_parms" if the test is parameterized
+    #     "query_count" actual query count from run
+    #     "join_count" actual join count from run
+    #    ... any other counts added in the future ...
+    for test_file in sql_count_records:
+        test_file_record = sql_count_records[test_file]
+
+        input_file = test_file
+        out_file = f"{test_file}.{UPDATED_SUFFIX}"
+        orig_file = f"{test_file}.{ORIGINAL_SUFFIX}"
+
+        test_path = test_file.dirname.split("/")
+        tests_index = test_path.index("tests")
+        status_file = f"{'/'.join(test_path[:tests_index])}/{'.'.join(test_path[tests_index:])}.{STATUS_REPORT_FILE}"
+
+        # Remove any left over files from earlier full or partial runs.
+        if status_file != last_status_file and os.path.exists(status_file):
+            os.remove(status_file)
+
+        if os.path.exists(out_file):
+            os.remove(out_file)
+
+        if os.path.exists(orig_file):
+            os.remove(orig_file)
+
+        # Accumulate status lines for added annotation and not added.
+        added_status = []
+        not_added_status = []
+
+        out_lines = []
+        with open(input_file) as src_file:
+            for line in src_file.readlines():
+                # Find the next test method in source file.
+                if line.lstrip().startswith("def test"):
+                    line_indent = " " * line.index("def")
+                    test_name = re.split(r"[ |\(]", line.lstrip())[1]
+
+                    if test_name in test_file_record:
+                        test_records = test_file_record[test_name]
+                        tr0 = test_records[0]
+                        count_values = get_readable_sql_count_values(tr0)
+
+                        # Check if we can add a decorator here, all parameterized tests must have same counts.
+                        if len(test_records) == 1 or all(
+                            [
+                                all(
+                                    [
+                                        tr[key] == tr0[key]
+                                        for key in SQL_COUNT_PARAMETERS
+                                    ]
+                                )
+                                for tr in test_records[1:]
+                            ]
+                        ):
+                            out_line = f"{SQL_COUNT_CHECKER}({count_values})"
+                            skip_line = False
+                            if len(out_lines) > 0:
+                                if out_lines[-1].lstrip().startswith(out_line):
+                                    not_added_status.append(
+                                        (test_name, f"Already has {out_line}")
+                                    )
+                                    skip_line = True
+                                else:
+                                    no_check_checker = (
+                                        f"{SQL_COUNT_CHECKER}({NO_CHECK}=True)"
+                                    )
+                                    if (
+                                        out_lines[-1]
+                                        .lstrip()
+                                        .startswith(no_check_checker)
+                                    ):
+                                        not_added_status.append(
+                                            (
+                                                test_name,
+                                                f"Skipping since no_check {no_check_checker}",
+                                            )
+                                        )
+                                        skip_line = True
+
+                            if not skip_line:
+                                if len(out_lines) > 0 and out_lines[
+                                    -1
+                                ].lstrip().startswith(SQL_COUNT_CHECKER):
+                                    added_status.append(
+                                        (test_name, f"Updated {out_line}")
+                                    )
+                                    out_lines.pop()
+                                else:
+                                    added_status.append(
+                                        (test_name, f"Added {out_line}")
+                                    )
+
+                                out_lines.append(f"{line_indent}{out_line}\n")
+                        else:
+                            if len(out_lines) > 0 and out_lines[-1].lstrip().startswith(
+                                SQL_COUNT_CHECKER
+                            ):
+                                added_status.append(
+                                    (test_name, f"Removing {out_lines[-1]}")
+                                )
+                                out_lines.pop()
+
+                            for tr in test_records:
+                                count_values = get_readable_sql_count_values(tr)
+                                test_parms = tr["test_parms"]
+                                not_added_status.append(
+                                    (
+                                        f"{test_name}[{test_parms}]",
+                                        f"Please add inline sql_count_checker code: {count_values}",
+                                    )
+                                )
+                    else:
+                        not_added_status.append(
+                            (test_name, "Unable to find test result, probably skipped")
+                        )
+
+                # We insert the import for sql_count_checker.  We don't guarantee the correct import ordering, so
+                # will need to run the lint to re-order and eliminate any duplicates that may occur.
+                if len(out_lines) > 0 and out_lines[-1].startswith("import pytest"):
+                    out_lines.append(
+                        "from tests.sql_counter import sql_count_checker\n"
+                    )
+
+                out_lines.append(line)
+
+        last_status_file = status_file
+
+        with open(out_file, "x") as new_src_file:
+            for line in out_lines:
+                new_src_file.write(line)
+
+        indent_len = max(len(st[0]) for st in added_status + not_added_status) + 1
+
+        banner = "=" * indent_len
+        with open(status_file, "a") as status_file:
+            line = f"\n{banner}\nTest file: {input_file}\n{banner}"
+            print(line)
+            status_file.write(f"{line}\n")
+            for st in added_status + not_added_status:
+                line = f"{st[0]:<{indent_len}}{st[1]}"
+                print(line)
+                status_file.write(f"{line}\n")
+
+        os.rename(test_file, orig_file)
+        os.rename(out_file, input_file)
+
+
+def generate_sql_count_report(request, counter):
+    """
+    Helper function called to rewrite source files with sql_count_checker decorators based on record mode test run.
+    """
+    global sql_count_records
+    src_file = request.fspath
+    test_name_with_parms = request.node.name
+
+    if request.session.items[0].name == test_name_with_parms:
+        sql_count_records = {}
+
+    if src_file not in sql_count_records:
+        sql_count_records[src_file] = {}
+
+    actual_counts = counter.get_actual_counts()
+
+    file_count_records = sql_count_records[src_file]
+
+    test_name, test_parms = re.split(r"[\[|\]]", f"{test_name_with_parms}[]")[:2]
+
+    if test_name not in file_count_records:
+        file_count_records[test_name] = []
+
+    test_count_records = file_count_records[test_name]
+
+    test_count_record = {}
+    test_count_record["test_name"] = test_name
+    test_count_record["test_parms"] = test_parms
+    for key, value in actual_counts.items():
+        test_count_record[key] = value
+    test_count_records.append(test_count_record)
+
+    if request.session.items[-1].name == test_name_with_parms:
+        update_test_code_with_sql_counts(sql_count_records)
+
+
+def mark_sql_counter_called():
+    threading.current_thread().__dict__[SQL_COUNTER_CALLED] = True
+
+
+def clear_sql_counter_called():
+    threading.current_thread().__dict__[SQL_COUNTER_CALLED] = False
+
+
+def is_sql_counter_called():
+    if SQL_COUNTER_CALLED in threading.current_thread().__dict__:
+        return threading.current_thread().__dict__.get(SQL_COUNTER_CALLED)
+    return False
diff --git a/tests/integ/modin/strings/__init__.py b/tests/integ/modin/strings/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/strings/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/strings/test_case_justify.py b/tests/integ/modin/strings/test_case_justify.py
new file mode 100644
index 00000000000..aa8f5000829
--- /dev/null
+++ b/tests/integ/modin/strings/test_case_justify.py
@@ -0,0 +1,34 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@sql_count_checker(query_count=1)
+def test_title():
+    s = pd.Series(["FOO", "BAR", "Blah", "blurg"], dtype=object)
+    result = s.str.title()
+    expected = native_pd.Series(["Foo", "Bar", "Blah", "Blurg"], dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_casefold():
+    # GH25405
+    expected = native_pd.Series(["ss", "case", "ssd"])
+    s = pd.Series(["ß", "case", "ßd"])
+    result = s.str.casefold()
+
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
diff --git a/tests/integ/modin/strings/test_cat.py b/tests/integ/modin/strings/test_cat.py
new file mode 100644
index 00000000000..34d995eb9cc
--- /dev/null
+++ b/tests/integ/modin/strings/test_cat.py
@@ -0,0 +1,99 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.fixture(scope="module", autouse=True)
+def skip(pytestconfig):
+    if running_on_public_ci():
+        pytest.skip(
+            "Disable series str tests for public ci",
+            allow_module_level=True,
+        )
+
+
+@pytest.fixture(scope="function")
+def snow_series():
+    return pd.Series(["a", "a", "b", "b", "c", np.nan])
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+# TODO (SNOW-863786): import whole pandas/tests/strings/test_cat.py
+@pytest.mark.parametrize(
+    "sep, na_rep, expected, query_count, fallback_count, sproc_count",
+    [
+        (None, None, "aabbc", 8, 1, 1),
+        (None, "-", "aabbc-", 8, 1, 1),
+        ("_", "NA", "a_a_b_b_c_NA", 8, 1, 1),
+    ],
+)
+def test_str_cat_single_array(
+    snow_series, sep, na_rep, expected, query_count, fallback_count, sproc_count
+):
+    # "str.cat" Series/Index to ndarray/list
+    with SqlCounter(
+        query_count=query_count, fallback_count=fallback_count, sproc_count=sproc_count
+    ):
+        result = snow_series.str.cat(sep=sep, na_rep=na_rep)
+        assert result == expected
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+def test_str_cat_series_with_array(snow_series):
+    t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
+    expected = native_pd.Series(["aa", "a-", "bb", "bd", "cfoo", "--"])
+
+    # Series/Index with array
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = snow_series.str.cat(t, na_rep="-")
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+    # Series/Index with list
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = snow_series.str.cat(list(t), na_rep="-")
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+def test_str_cat_incorrect_lengths(snow_series):
+    # errors for incorrect lengths
+    # rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+    z = pd.Series(["1", "2", "3"])
+
+    with SqlCounter(query_count=5):
+        with pytest.raises(SnowparkSQLException):
+            # This call is expected to raise SnowparkSQLException after
+            # calling the stored procedure, which should produce 5 queries
+            # as expected
+            snow_series.str.cat(z.values)
+
+    with SqlCounter(query_count=6):
+        with pytest.raises(SnowparkSQLException):
+            # This call is expected to raise SnowparkSQLException after
+            # calling the stored procedure, which should produce 5 queries.
+            # However, in this case an additional row COUNT() query is
+            # executed before the stored procedure fallback to get the number
+            # of rows in z.
+            snow_series.str.cat(list(z))
diff --git a/tests/integ/modin/strings/test_extract.py b/tests/integ/modin/strings/test_extract.py
new file mode 100644
index 00000000000..1acd30cf58f
--- /dev/null
+++ b/tests/integ/modin/strings/test_extract.py
@@ -0,0 +1,160 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+# TODO (SNOW-767685): This whole suite is skipped in ci run because those are tests for unsupported
+#   APIs, which is time consuming. we will set up a daily jenkins job to run those daily.
+@pytest.fixture(scope="module", autouse=True)
+def skip(pytestconfig):
+    if running_on_public_ci():
+        pytest.skip(
+            "Disable series str tests for public ci",
+            allow_module_level=True,
+        )
+
+
+# TODO (SNOW-863786): import whole pandas/tests/strings/test_extract.py
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=4)
+def test_extract_expand_kwarg_wrong_type_raises():
+    values = pd.Series(["fooBAD__barBAD", np.nan, "foo"], dtype=object)
+    with pytest.raises(SnowparkSQLException):
+        values.str.extract(".*(BAD[_]+).*(BAD)", expand=None)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=24, fallback_count=3, sproc_count=3)
+def test_extract_expand_kwarg():
+    s = pd.Series(["fooBAD__barBAD", np.nan, "foo"], dtype=object)
+    expected = native_pd.DataFrame(["BAD__", np.nan, np.nan], dtype=object)
+
+    result = s.str.extract(".*(BAD[_]+).*")
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    result = s.str.extract(".*(BAD[_]+).*", expand=True)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    expected = native_pd.DataFrame(
+        [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=object
+    )
+    result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+def test_extract_expand_capture_groups():
+    s = pd.Series(["A1", "B2", "C3"], dtype=object)
+    # one group, no matches
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("(_)", expand=False)
+        expected = native_pd.Series([np.nan, np.nan, np.nan], dtype=object)
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # two groups, no matches
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("(_)(_)", expand=False)
+        expected = native_pd.DataFrame(
+            [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=object
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # one group, some matches
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("([AB])[123]", expand=False)
+        expected = native_pd.Series(["A", "B", np.nan], dtype=object)
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # two groups, some matches
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("([AB])([123])", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=object
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # one named group
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("(?P<letter>[AB])", expand=False)
+        expected = native_pd.Series(["A", "B", np.nan], name="letter", dtype=object)
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # two named groups
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("(?P<letter>[AB])(?P<number>[123])", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+            columns=["letter", "number"],
+            dtype=object,
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # mix named and unnamed groups
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("([AB])(?P<number>[123])", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+            columns=[0, "number"],
+            dtype=object,
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # one normal group, one non-capturing group
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        result = s.str.extract("([AB])(?:[123])", expand=False)
+        expected = native_pd.Series(["A", "B", np.nan], dtype=object)
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # two normal groups, one non-capturing group
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        s = pd.Series(["A11", "B22", "C33"], dtype=object)
+        result = s.str.extract("([AB])([123])(?:[123])", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=object
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # one optional group followed by one normal group
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        s = pd.Series(["A1", "B2", "3"], dtype=object)
+        result = s.str.extract("(?P<letter>[AB])?(?P<number>[123])", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], [np.nan, "3"]],
+            columns=["letter", "number"],
+            dtype=object,
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    # one normal group followed by one optional group
+    with SqlCounter(query_count=8, fallback_count=1, sproc_count=1):
+        s = pd.Series(["A1", "B2", "C"], dtype=object)
+        result = s.str.extract("(?P<letter>[ABC])(?P<number>[123])?", expand=False)
+        expected = native_pd.DataFrame(
+            [["A", "1"], ["B", "2"], ["C", np.nan]],
+            columns=["letter", "number"],
+            dtype=object,
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
diff --git a/tests/integ/modin/strings/test_get_dummies.py b/tests/integ/modin/strings/test_get_dummies.py
new file mode 100644
index 00000000000..786c19bfacc
--- /dev/null
+++ b/tests/integ/modin/strings/test_get_dummies.py
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas import _testing as tm
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+# TODO (SNOW-767685): This whole suite is skipped in ci run because those are tests for unsupported
+#   APIs, which is time consuming. we will set up a daily jenkins job to run those daily.
+@pytest.fixture(scope="module", autouse=True)
+def skip(pytestconfig):
+    if running_on_public_ci():
+        pytest.skip(
+            "Disable series str tests for public ci",
+            allow_module_level=True,
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+# TODO (SNOW-863786): import whole pandas/tests/strings/test_get_dummies.py
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_get_dummies():
+    s = pd.Series(["a|b", "a|c", np.nan], dtype=object)
+    result = s.str.get_dummies("|")
+    expected = native_pd.DataFrame(
+        [[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc")
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+    s = pd.Series(["a;b", "a", 7], dtype=object)
+    result = s.str.get_dummies(";")
+    expected = native_pd.DataFrame(
+        [[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab")
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_get_dummies_with_name_dummy():
+    # GH 12180
+    # Dummies named 'name' should work as expected
+    s = pd.Series(["a", "b,name", "b"], dtype=object)
+    result = s.str.get_dummies(",")
+    expected = native_pd.DataFrame(
+        [[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"]
+    )
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
+
+
+@sql_count_checker(query_count=0)
+def test_get_dummies_index():
+    # GH9980, GH8028
+    # This is a complete native pandas execution since Snowpark pandas
+    # Index is just an alias of native pandas
+    idx = pd.Index(["a|b", "a|c", "b|c"])
+    result = idx.str.get_dummies("|")
+
+    expected = pd.MultiIndex.from_tuples(
+        [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c")
+    )
+    tm.assert_index_equal(result, expected)
diff --git a/tests/integ/modin/strings/test_get_dummies_dataframe.py b/tests/integ/modin/strings/test_get_dummies_dataframe.py
new file mode 100644
index 00000000000..6cc05d3ce3a
--- /dev/null
+++ b/tests/integ/modin/strings/test_get_dummies_dataframe.py
@@ -0,0 +1,224 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", ["Brenan", "Is", "A", "Manager", "1"])
+@pytest.mark.parametrize("prefix_sep", ["_", "/"])
+def test_get_dummies_madeup(prefix, prefix_sep):
+    pandas_df = native_pd.DataFrame(
+        {"COL_0": [1, 1, 3], "COL_1": ["MANAGER", "MINION", "EMPLOYEE"]}
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df, columns=["COL_1"], prefix=prefix, prefix_sep=prefix_sep
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df, columns=["COL_1"], prefix=prefix, prefix_sep=prefix_sep
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_get_dummies_prefix_and_column_same():
+    pandas_df = native_pd.DataFrame(
+        {"COL_0": [1, 1, 3], "COL_1": ["MANAGER", "MINION", "EMPLOYEE"]}
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df, columns=["COL_1"], prefix="COL_1"
+    )
+
+    snow_get_dummies = pd.get_dummies(snow_df, columns=["COL_1"], prefix="COL_1")
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix", ["2", "10", "A", "Manager", "1"])
+@pytest.mark.parametrize("prefix_sep", ["_", "/"])
+def test_get_dummies_with_numeric_column_names(prefix, prefix_sep):
+    pandas_df = native_pd.DataFrame(
+        {1: ["Brenan", "Bala", "John"], 10000: ["MANAGER", "MINION", "EMPLOYEE"]}
+    )
+    snow_df = pd.DataFrame(pandas_df)
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df, columns=[1], prefix=prefix, prefix_sep=prefix_sep
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df, columns=[1], prefix=prefix, prefix_sep=prefix_sep
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("prefix_sep", ["_", "/"])
+def test_get_dummies_pandas(prefix_sep):
+
+    pandas_df = native_pd.DataFrame(
+        {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
+    )
+
+    snow_df = pd.DataFrame(pandas_df)
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df, prefix=["col1", "col2"], prefix_sep=prefix_sep
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df, prefix=["col1", "col2"], prefix_sep=prefix_sep
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize("sort_column", ["A", "C", "D"])
+@sql_count_checker(query_count=1)
+def test_get_dummies_pandas_no_row_pos_col(sort_column):
+    data = {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
+    if sort_column == "D":
+        data["D"] = [1, 2, 3]
+        pandas_df = native_pd.DataFrame(data).sort_values("D", ascending=False)[
+            ["A", "B", "C"]
+        ]
+        snow_df = pd.DataFrame(data).sort_values("D", ascending=False)[["A", "B", "C"]]
+    else:
+        pandas_df = native_pd.DataFrame(data).sort_values(sort_column, ascending=False)
+        snow_df = pd.DataFrame(data).sort_values(sort_column, ascending=False)
+
+    assert (
+        snow_df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is None
+    )
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize("sort_column", ["A", "C"])
+@sql_count_checker(query_count=1)
+def test_get_dummies_pandas_no_row_pos_col_duplicate_values(sort_column):
+    pandas_df = native_pd.DataFrame(
+        {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 2]}
+    ).sort_values(sort_column, ascending=False)
+
+    snow_df = pd.DataFrame(
+        {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 2]}
+    ).sort_values(sort_column, ascending=False)
+    assert (
+        snow_df._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is None
+    )
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+# https://snowflakecomputing.atlassian.net/browse/SNOW-1050112
+# Customer issue: Calling get_dummies on the result of
+# pd.read_snowflake directly results in a ValueError.
+@sql_count_checker(query_count=3)
+def test_get_dummies_pandas_after_read_snowflake(session):
+    pandas_df = native_pd.DataFrame(
+        {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
+    )
+    snowpark_df = session.create_dataframe(pandas_df)
+    table_name = random_name_for_temp_object(TempObjectType.TABLE)
+    snowpark_df.write.save_as_table(table_name, table_type="temp")
+    snow_df = pd.read_snowflake(table_name)
+
+    assert (
+        snow_df._query_compiler._modin_frame.index_column_snowflake_quoted_identifiers
+        == ['"__row_position__"']
+        and "__index__"
+        not in snow_df._query_compiler._modin_frame.ordered_dataframe.projected_column_snowflake_quoted_identifiers
+    )
+
+    pandas_get_dummies = native_pd.get_dummies(
+        pandas_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    snow_get_dummies = pd.get_dummies(
+        snow_df,
+        prefix=["col1", "col2"],
+        prefix_sep="/",
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        snow_get_dummies, pandas_get_dummies, check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_get_dummies_pandas_negative():
+
+    pandas_df = native_pd.DataFrame(
+        {"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}
+    )
+
+    snow_df = pd.DataFrame(pandas_df)
+
+    with pytest.raises(NotImplementedError):
+        pd.get_dummies(
+            snow_df,
+            prefix=["col1", "col2"],
+            dummy_na=True,
+            drop_first=True,
+            dtype=np.int32,
+        )
diff --git a/tests/integ/modin/strings/test_get_dummies_series.py b/tests/integ/modin/strings/test_get_dummies_series.py
new file mode 100644
index 00000000000..a6b004f577a
--- /dev/null
+++ b/tests/integ/modin/strings/test_get_dummies_series.py
@@ -0,0 +1,54 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.mark.parametrize("data", [list("abca"), list("mxyzptlk")])
+@sql_count_checker(query_count=1)
+def test_get_dummies_series(data):
+
+    pandas_ser = native_pd.Series(data)
+
+    snow_ser = pd.Series(pandas_ser)
+
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.get_dummies(snow_ser), native_pd.get_dummies(pandas_ser), check_dtype=False
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_get_dummies_pandas_no_row_pos_col():
+    pandas_ser = native_pd.Series(["a", "b", "a"]).sort_values()
+
+    snow_ser = pd.Series(["a", "b", "a"]).sort_values()
+    assert (
+        snow_ser._query_compiler._modin_frame.row_position_snowflake_quoted_identifier
+        is None
+    )
+
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.get_dummies(snow_ser), native_pd.get_dummies(pandas_ser), check_dtype=False
+    )
+
+
+@pytest.mark.parametrize("data", [[1, 2, 3, 4, 5, 6], [True, False]])
+@sql_count_checker(query_count=0)
+def test_get_dummies_series_negative(data):
+
+    pandas_ser = native_pd.Series(data)
+
+    snow_ser = pd.Series(pandas_ser)
+
+    with pytest.raises(NotImplementedError):
+        assert_snowpark_pandas_equal_to_pandas(
+            pd.get_dummies(snow_ser),
+            native_pd.get_dummies(pandas_ser),
+            check_dtype=False,
+        )
diff --git a/tests/integ/modin/strings/test_strings.py b/tests/integ/modin/strings/test_strings.py
new file mode 100644
index 00000000000..b17735310da
--- /dev/null
+++ b/tests/integ/modin/strings/test_strings.py
@@ -0,0 +1,602 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equal_to_pandas,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+# This whole suite is skipped in ci run because those are tests for unsupported
+# APIs, which is time-consuming, and it will run the daily jenkins job.
+@pytest.fixture(scope="module", autouse=True)
+def skip(pytestconfig):
+    if running_on_public_ci():
+        pytest.skip(
+            "Disable series str tests for public ci",
+            allow_module_level=True,
+        )
+
+
+# TODO (SNOW-863786): import whole pandas/tests/strings/test_strings.py
+@pytest.mark.parametrize("pattern", [0, True, native_pd.Series(["foo", "bar"])])
+@sql_count_checker(query_count=0)
+def test_startswith_endswith_non_str_patterns(pattern):
+    ser = pd.Series(["foo", "bar"])
+    if isinstance(pattern, native_pd.Series):
+        pattern = pd.Series(pattern)
+
+    msg = f"expected a string or tuple, not {type(pattern).__name__}"
+    with pytest.raises(TypeError, match=msg):
+        ser.str.startswith(pattern)
+    with pytest.raises(TypeError, match=msg):
+        ser.str.endswith(pattern)
+
+
+@sql_count_checker(query_count=1)
+def test_count():
+    ser = pd.Series(["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=object)
+
+    result = ser.str.count("f[o]+")
+    expected = native_pd.Series([1, 2, np.nan, 4], dtype=np.float64)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "repeat, expected_result_data",
+    [
+        (3, ["aaa", "bbb", np.nan, "ccc", np.nan, "ddd"]),
+        ([1, 2, 3, 4, 5, 6], ["a", "bb", np.nan, "cccc", np.nan, "dddddd"]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_repeat(repeat, expected_result_data):
+    ser = pd.Series(["a", "b", np.nan, "c", np.nan, "d"], dtype=object)
+
+    result = ser.str.repeat(repeat)
+    expected = native_pd.Series(expected_result_data, dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize("arg, repeat", [[None, 4], ["b", None]])
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_repeat_with_null(arg, repeat):
+    ser = pd.Series(["a", arg], dtype=object)
+    result = ser.str.repeat([3, repeat])
+    expected = native_pd.Series(["aaa", None], dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_empty_str_empty_cat():
+    assert pd.Series(dtype=object).str.cat() == ""
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_empty_df_float_raises():
+    with pytest.raises(AttributeError):
+        pd.Series(dtype="float64").str.cat()
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_empty_str_self_cat():
+    # The query count is higher in this test because of the creation of a temp table for the
+    # second series argument being passed in as argument to the cat sproc
+    # Related: SNOW-960061
+
+    eval_snowpark_pandas_result(
+        pd.Series(dtype=object),
+        native_pd.Series(dtype=object),
+        lambda ser: ser.str.cat(ser),
+        comparator=assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    )
+
+
+@pytest.mark.parametrize(
+    "fn",
+    [
+        (lambda ser: ser.str.title()),
+        (lambda ser: ser.str.count("a")),
+        (lambda ser: ser.str.contains("a")),
+        (lambda ser: ser.str.startswith("a")),
+        (lambda ser: ser.str.endswith("a")),
+        (lambda ser: ser.str.lower()),
+        (lambda ser: ser.str.upper()),
+        (lambda ser: ser.str.replace("a", "b")),
+    ],
+)
+def test_empty_str_methods(fn, query_count=1, fallback_count=0, sproc_count=0):
+    with SqlCounter(
+        query_count=query_count, fallback_count=fallback_count, sproc_count=sproc_count
+    ):
+        eval_snowpark_pandas_result(
+            pd.Series(dtype=object),
+            native_pd.Series(dtype=object),
+            fn,
+            comparator=assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+        )
+
+
+@pytest.mark.parametrize(
+    "method, expected, query_count, fallback_count, sproc_count",
+    [
+        pytest.param(
+            "isalnum",
+            [True, True, True, True, True, False, True, True, False, False],
+            9,
+            1,
+            1,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        pytest.param(
+            "isalpha",
+            [True, True, True, False, False, False, True, False, False, False],
+            9,
+            1,
+            1,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        (
+            "isdigit",
+            [False, False, False, True, False, False, False, True, False, False],
+            2,
+            0,
+            0,
+        ),
+        pytest.param(
+            "isnumeric",
+            [False, False, False, True, False, False, False, True, False, False],
+            9,
+            1,
+            1,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        pytest.param(
+            "isspace",
+            [False, False, False, False, False, False, False, False, False, True],
+            9,
+            1,
+            1,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        (
+            "islower",
+            [False, True, False, False, False, False, False, False, False, False],
+            2,
+            0,
+            0,
+        ),
+        (
+            "isupper",
+            [True, False, False, False, True, False, True, False, False, False],
+            2,
+            0,
+            0,
+        ),
+        (
+            "istitle",
+            [True, False, True, False, True, False, False, False, False, False],
+            2,
+            0,
+            0,
+        ),
+    ],
+)
+def test_ismethods(method, expected, query_count, fallback_count, sproc_count):
+    data = ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", "  "]
+    native_ser = native_pd.Series(data, dtype=object)
+    ser = pd.Series(data, dtype=object)
+
+    expected = native_pd.Series(expected, dtype=bool)
+    with SqlCounter(
+        query_count=query_count, fallback_count=fallback_count, sproc_count=sproc_count
+    ):
+        result = getattr(ser.str, method)()
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+        # compare with standard library
+        expected = [getattr(item, method)() for item in native_ser]
+        assert list(result.to_pandas()) == expected
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "method, expected",
+    [
+        ("isnumeric", [False, True, True, False, True, True, False]),
+        ("isdecimal", [False, True, False, False, False, True, False]),
+    ],
+)
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_isnumeric_unicode(method, expected):
+    # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
+    # 0x2605: ★ not number
+    # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
+    # 0xFF13: ３ Em 3
+    data = ["A", "3", "¼", "★", "፸", "３", "four"]
+    native_ser = native_pd.Series(data, dtype=object)
+    ser = pd.Series(data, dtype=object)
+    expected = native_pd.Series(expected, dtype=bool)
+    result = getattr(ser.str, method)()
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    # compare with standard library
+    expected = [getattr(item, method)() for item in native_ser]
+    assert list(result.to_pandas()) == expected
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "method, expected",
+    [
+        ("isnumeric", [False, np.nan, True, False, np.nan, True, False]),
+        ("isdecimal", [False, np.nan, False, False, np.nan, True, False]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_isnumeric_unicode_missing(method, expected):
+    values = ["A", np.nan, "¼", "★", np.nan, "３", "four"]
+    ser = pd.Series(values, dtype=object)
+    expected = native_pd.Series(expected, dtype=object)
+    result = getattr(ser.str, method)()
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_split_join_roundtrip():
+    ser = pd.Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=object)
+    result = ser.str.split("_").str.join("_")
+    expected = ser.to_pandas().astype(object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@sql_count_checker(query_count=1, fallback_count=0, sproc_count=0)
+def test_len():
+    ser = pd.Series(
+        ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"],
+        dtype=object,
+    )
+    result = ser.str.len()
+    expected = native_pd.Series([3, 4, 6, np.nan, 8, 4, 1], dtype=np.float64)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "method,sub,start,end,expected",
+    [
+        ("index", "EF", None, None, [4, 3, 1, 0]),
+        ("rindex", "EF", None, None, [4, 5, 7, 4]),
+        ("index", "EF", 3, None, [4, 3, 7, 4]),
+        ("rindex", "EF", 3, None, [4, 5, 7, 4]),
+        ("index", "E", 4, 8, [4, 5, 7, 4]),
+        ("rindex", "E", 0, 5, [4, 3, 1, 4]),
+    ],
+)
+@sql_count_checker(query_count=9, fallback_count=1, sproc_count=1)
+def test_index(method, sub, start, end, expected):
+    data = ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"]
+    native_obj = native_pd.Series(data, dtype=object)
+    obj = pd.Series(data, dtype=object)
+    expected = native_pd.Series(expected, dtype=np.int8)
+    result = getattr(obj.str, method)(sub, start, end)
+
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    # compare with standard library
+    expected = [getattr(item, method)(sub, start, end) for item in native_obj]
+    assert list(result.to_pandas()) == expected
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=4)
+def test_index_not_found_raises():
+    obj = pd.Series(["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=object)
+    with pytest.raises(SnowparkSQLException):
+        obj.str.index("DE")
+
+
+@pytest.mark.parametrize("method", ["index", "rindex"])
+@sql_count_checker(query_count=0)
+def test_index_wrong_type_raises(method):
+    obj = pd.Series([], dtype=object)
+    msg = "expected a string object, not int"
+
+    with pytest.raises(TypeError, match=msg):
+        getattr(obj.str, method)(0)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["index", [1, 1, 0]],
+        ["rindex", [3, 1, 2]],
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_index_missing(method, exp):
+    ser = pd.Series(["abcb", "ab", "bcbe", np.nan], dtype=object)
+
+    result = getattr(ser.str, method)("b")
+    expected = native_pd.Series(exp + [np.nan], dtype=np.float64)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "start, stop, step, expected",
+    [
+        (2, 5, None, ["foo", "bar", np.nan, "baz"]),
+        (0, 3, -1, ["", "", np.nan, ""]),
+        (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
+        (3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
+        (3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_slice(start, stop, step, expected):
+    ser = pd.Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=object)
+    result = ser.str.slice(start, stop, step)
+    expected = native_pd.Series(expected, dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "start,stop,repl,expected",
+    [
+        (2, 3, None, ["shrt", "a it longer", "evnlongerthanthat", "", np.nan]),
+        (2, 3, "z", ["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]),
+        (2, 2, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]),
+        (2, 1, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]),
+        (-1, None, "z", ["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]),
+        (None, -2, "z", ["zrt", "zer", "zat", "z", np.nan]),
+        (6, 8, "z", ["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]),
+        (-10, 3, "z", ["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_slice_replace(start, stop, repl, expected):
+    ser = pd.Series(
+        ["short", "a bit longer", "evenlongerthanthat", "", np.nan],
+        dtype=object,
+    )
+    expected = native_pd.Series(expected, dtype=object)
+    result = ser.str.slice_replace(start, stop, repl)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["lstrip", ["aa   ", "bb \n", np.nan, "cc  "]],
+        ["rstrip", ["  aa", " bb", np.nan, "cc"]],
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_lstrip_rstrip(method, exp):
+    ser = pd.Series(["  aa   ", " bb \n", np.nan, "cc  "], dtype=object)
+
+    result = getattr(ser.str, method)()
+    expected = native_pd.Series(exp, dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_removeprefix(prefix, expected):
+    ser = pd.Series(["ab", "a b c", "bc"], dtype=object)
+    result = ser.str.removeprefix(prefix)
+    ser_expected = native_pd.Series(expected, dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, ser_expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_removesuffix(suffix, expected):
+    ser = pd.Series(["ab", "a b c", "bc"], dtype=object)
+    result = ser.str.removesuffix(suffix)
+    ser_expected = native_pd.Series(expected, dtype=object)
+    assert_snowpark_pandas_equal_to_pandas(result, ser_expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_encode_decode():
+    ser = pd.Series(["a", "b", "a\xe4"], dtype=object).str.encode("utf-8")
+    result = ser.str.decode("utf-8")
+
+    expected = ser.to_pandas().str.decode("utf-8")
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "form, expected",
+    [
+        ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]),
+        ("NFC", ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_normalize(
+    form,
+    expected,
+):
+    ser = pd.Series(
+        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],
+        index=["a", "b", "c", "d", "e"],
+        dtype=object,
+    )
+    expected = native_pd.Series(expected, index=["a", "b", "c", "d", "e"], dtype=object)
+    result = ser.str.normalize(form)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "width, data, expected_data",
+    [
+        (3, ["-1", "1", "1000", 10, np.nan], ["-01", "001", "1000", np.nan, np.nan]),
+        (5, ["-2", "+5"], ["-0002", "+0005"]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_zfill(width, data, expected_data):
+    # https://github.com/pandas-dev/pandas/issues/20868
+    value = pd.Series(data)
+    result = value.str.zfill(width)
+    expected = native_pd.Series(expected_data)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_zfill_with_leading_sign():
+    value = pd.Series(["-cat", "-1", "+dog"])
+    expected = native_pd.Series(["-0cat", "-0001", "+0dog"])
+    result = value.str.zfill(5)
+    assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.parametrize(
+    "key, expected_result",
+    [
+        ("name", ["Hello", "Goodbye", None]),
+        ("value", ["World", "Planet", "Sea"]),
+    ],
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_get_with_dict_label(key, expected_result):
+    # GH47911
+    s = pd.Series(
+        [
+            {"name": "Hello", "value": "World"},
+            {"name": "Goodbye", "value": "Planet"},
+            {"value": "Sea"},
+        ]
+    )
+    result = s.str.get(key)
+    expected = native_pd.Series(expected_result)
+    assert_snowpark_pandas_equal_to_pandas(result, expected, check_dtype=False)
diff --git a/tests/integ/modin/test_classes.py b/tests/integ/modin/test_classes.py
new file mode 100644
index 00000000000..c92bb85c531
--- /dev/null
+++ b/tests/integ/modin/test_classes.py
@@ -0,0 +1,90 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import inspect
+import os
+import sys
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+def expect_type_check(df, expected_type: type, expected_class_name: str) -> None:
+    """helper function to check whether df adheres to expected type and classname"""
+    assert isinstance(
+        df, expected_type
+    ), "Snowpark pandas expects type {}, but got {}".format(
+        str(expected_type), str(type(df))
+    )
+    class_name_str = f"<class '{expected_class_name}'>"
+    assert (
+        str(df.__class__) == class_name_str
+    ), f"Snowpark pandas expected classname {expected_class_name}, but got {str(df.__class__)}"
+
+
+@pytest.mark.skipif(
+    sys.version_info != (3, 8), reason="stored proc only supported for pyton3.8"
+)
+def test_class_names_constructors():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    expect_type_check(
+        df,
+        pd.DataFrame,
+        "snowflake.snowpark.modin.pandas.dataframe.DataFrame",
+    )
+
+    s = pd.Series(index=[1, 2, 3], data=[3, 2, 1])
+    expect_type_check(
+        s,
+        pd.Series,
+        "snowflake.snowpark.modin.pandas.series.Series",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_class_names_io_entry_points(tmp_path):
+    # make sure I/O entry points return correct classes as well
+    frame = inspect.currentframe()
+    func_name = frame.f_code.co_name
+    test_path = os.path.join(tmp_path, f"{func_name}_test.csv")
+    with open(test_path, "w") as fp:
+        fp.write("a,b,c\n1,2,3\n4,5,6\n7,8,9\n")
+
+
+@sql_count_checker(query_count=0)
+def test_op():
+    # make sure operations still return Snowpark pandas
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df = df[df.a >= 2]
+    expect_type_check(
+        df,
+        pd.DataFrame,
+        "snowflake.snowpark.modin.pandas.dataframe.DataFrame",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_native_conversion():
+    # native pandas -> Snowpark pandas
+    ndf = native_pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    expect_type_check(ndf, native_pd.DataFrame, "pandas.core.frame.DataFrame")
+
+    df = pd.DataFrame(ndf)
+    expect_type_check(
+        df,
+        pd.DataFrame,
+        "snowflake.snowpark.modin.pandas.dataframe.DataFrame",
+    )
+
+    # Snowpark pandas -> native pandas
+    # convert using modin version (private method, as there is no public method desired)
+    ndf = df._to_pandas()
+    expect_type_check(ndf, native_pd.DataFrame, "pandas.core.frame.DataFrame")
+
+    # convert using DataFrame way w. public method
+    ndf = df.to_pandas()
+    expect_type_check(ndf, native_pd.DataFrame, "pandas.core.frame.DataFrame")
diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py
new file mode 100644
index 00000000000..f1db5d93c6b
--- /dev/null
+++ b/tests/integ/modin/test_concat.py
@@ -0,0 +1,1043 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from collections import deque
+from collections.abc import Hashable, Iterable
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas import Index, MultiIndex
+from pandas.testing import assert_index_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    assert_series_equal,
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(scope="function")
+def df1():
+    return pd.DataFrame(
+        {
+            "C": [1, 2, 3],
+            "A": ["a", "b", "c"],
+            "D": [3, 2, 1],
+        },
+        index=Index([3, 1, 2], name="left_i"),
+    )
+
+
+@pytest.fixture(scope="function")
+def df2():
+    return pd.DataFrame(
+        {
+            "P": [3, 2, 1, 3],
+            "A": ["a", "b", "c", "a"],
+            "C": [1, 2, 3, 2],
+        },
+        index=Index([2, 0, 3, 4], name="right_i"),
+    )
+
+
+@pytest.fixture(scope="function")
+def df_single_col():
+    return pd.DataFrame([1], columns=["A"])
+
+
+@pytest.fixture(scope="function")
+def zero_rows_df():
+    return pd.DataFrame(columns=["A", "B"])
+
+
+@pytest.fixture(scope="function")
+def zero_columns_df():
+    return pd.DataFrame(index=Index([1, 2]))
+
+
+@pytest.fixture(scope="function")
+def empty_df():
+    return pd.DataFrame()
+
+
+@pytest.fixture(scope="function")
+def series1():
+    return pd.Series([1, 2])
+
+
+@pytest.fixture(scope="function")
+def series2():
+    return pd.Series([2, 1])
+
+
+@pytest.fixture(params=["inner", "outer"])
+def join(request):
+    """
+    join argument to pass to concat.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """
+    sort argument to pass to concat.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def ignore_index(request):
+    """
+    ignore_index argument to pass to concat.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[0, 1])
+def axis(request):
+    """
+    ignore_index argument to pass to concat.
+    """
+    return request.param
+
+
+def _concat_operation(objs, native_objs=None, **kwargs):
+    if native_objs is None:
+        native_objs = [obj.to_pandas() for obj in objs]
+    return (
+        lambda x: pd.concat(objs, **kwargs)
+        if x == "pd"
+        else native_pd.concat(native_objs, **kwargs)
+    )
+
+
+def test_concat_basic(df1, df2, join, sort, axis, ignore_index):
+    expected_join_count = 1 if axis == 1 else 0
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation(
+                [df1, df2], axis=axis, join=join, sort=sort, ignore_index=ignore_index
+            ),
+        )
+
+
+@sql_count_checker(query_count=0)
+def test_concat_no_items_negative():
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([]),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="No objects to concatenate",
+    )
+
+
+def test_concat_exclude_none(df1, df2, axis):
+    expected_join_count = 2 if axis == 1 else 0
+    with SqlCounter(query_count=2, join_count=expected_join_count):
+        # Verify that none objects are simply ignored.
+        pieces = [df1, None, df2, None]
+        result = pd.concat(pieces, axis=axis)
+        expected = pd.concat([df1, df2], axis=axis)
+        assert_frame_equal(result, expected)
+
+
+@sql_count_checker(query_count=0)
+def test_concat_all_none_negative():
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([None, None], [None, None]),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="All objects passed were None",
+    )
+
+
+def test_concat_mixed_objs(df1, df2, series1, series2, axis, join):
+    expected_join_count = 1 if axis == 1 else 0
+    expected_join_count_with_duplicates = 2 if axis == 1 else 0
+
+    # Series and Dataframes
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([df1, series1], axis=axis, join=join),
+        )
+
+    # All dataframes
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([df1, df2], axis=axis, join=join),
+        )
+
+    # All dataframes with duplicates
+    with SqlCounter(query_count=4, join_count=expected_join_count_with_duplicates):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([df1, df2, df1], axis=axis, join=join),
+        )
+
+    # All series
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([series1, series2], axis=axis, join=join),
+        )
+
+    # All series with duplicates
+    with SqlCounter(query_count=4, join_count=expected_join_count_with_duplicates):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([series1, series2, series1], axis=axis, join=join),
+        )
+
+
+@pytest.mark.parametrize(
+    "name1, name2, expected_columns",
+    [
+        (None, None, [0, 1]),
+        ("foo", None, ["foo", 0]),
+        (None, "bar", [0, "bar"]),
+        ("foo", "bar", ["foo", "bar"]),
+        ("foo", "foo", ["foo", "foo"]),
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_series_names_axis1(series1, series2, name1, name2, expected_columns):
+    series1 = series1.rename(name1)
+    series2 = series2.rename(name2)
+    native_s1 = series1.to_pandas()
+    native_s2 = series2.to_pandas()
+    # snow result
+    snow_res = pd.concat([series1, series2], axis=1)
+    native_res = native_pd.concat([native_s1, native_s2], axis=1)
+    assert_frame_equal(snow_res, native_res)
+    # Explicit check for column names
+    assert snow_res.columns.tolist() == expected_columns
+
+
+@pytest.mark.parametrize(
+    "name1, name2, expected_name",
+    [
+        (None, None, None),
+        ("foo", None, None),
+        (None, "bar", None),
+        ("foo", "bar", None),
+        ("foo", "foo", "foo"),
+    ],
+)
+@sql_count_checker(query_count=3, union_count=1)
+def test_concat_series_names_axis0(series1, series2, name1, name2, expected_name):
+    series1 = series1.rename(name1)
+    series2 = series2.rename(name2)
+    native_s1 = series1.to_pandas()
+    native_s2 = series2.to_pandas()
+    # snow result
+    snow_res = pd.concat([series1, series2])
+    native_res = native_pd.concat([native_s1, native_s2])
+    assert_series_equal(snow_res, native_res)
+    # Explicit check for column names
+    assert snow_res.name == expected_name
+
+
+@sql_count_checker(query_count=2)
+def test_concat_invalid_join_negative(df1, df2):
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2], join="left"),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match=r"Only can inner \(intersect\) or outer \(union\) join the other axis",
+    )
+
+
+def test_concat_iterables(df1, df2, axis):
+    # verify that concat works with tuples, list, deque, generators and custom iterables
+    expected = native_pd.concat([df1.to_pandas(), df2.to_pandas()], axis=axis)
+
+    expected_join_count = 1 if axis == 1 else 0
+
+    # list
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat([df1, df2], axis=axis), expected)
+
+    # tuple
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat((df1, df2), axis=axis), expected)
+
+    # generator
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat((df for df in (df1, df2)), axis=axis), expected)
+
+    # deque
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat(deque((df1, df2)), axis=axis), expected)
+
+    # custom iterator
+    class CustomIterator1:
+        def __init__(self, objs) -> None:
+            self.objs = objs
+            self.index = 0
+
+        def __iter__(self):
+            return self
+
+        def __next__(self):
+            if self.index < len(self.objs):
+                self.index = self.index + 1
+                return self.objs[self.index - 1]
+            else:
+                raise StopIteration
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat(CustomIterator1([df1, df2]), axis=axis), expected)
+
+    # customer iterator with generator
+    class CustomIterator2(Iterable):
+        def __iter__(self):
+            yield df1
+            yield df2
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        assert_frame_equal(pd.concat(CustomIterator2(), axis=axis), expected)
+
+
+@sql_count_checker(query_count=0)
+def test_concat_non_iterables_negative():
+    msg = (
+        "first argument must be an iterable of pandas objects, "
+        'you passed an object of type "str"'
+    )
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation("abc", "abc"),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match=msg,
+    )
+
+
+@pytest.mark.parametrize("obj", [native_pd.DataFrame(), native_pd.Series()])
+@sql_count_checker(query_count=0)
+def test_concat_native_object_negative(obj):
+    msg = (
+        f"{type(obj)} is not supported as 'value' argument. Please convert this to "
+        r"Snowpark pandas objects by calling modin.pandas.Series\(\)/DataFrame\(\)"
+    )
+    # As top level object
+    with pytest.raises(TypeError, match=msg):
+        pd.concat(obj)
+    # As list
+    with pytest.raises(TypeError, match=msg):
+        pd.concat([obj])
+    # As dict
+    with pytest.raises(TypeError, match=msg):
+        pd.concat({"a": obj})
+
+
+@sql_count_checker(query_count=1)
+def test_concat_invalid_type_negative(df1):
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, "abc"], [df1.to_pandas(), "abc"]),
+        expect_exception=True,
+        expect_exception_type=TypeError,
+        expect_exception_match="cannot concatenate object of type '<class 'str'>'; only Series and DataFrame objs are valid",
+    )
+
+
+def _index(labels: list[Hashable]) -> Index:
+    # Creates an index with single level
+    return Index(labels, tupleize_cols=False)
+
+
+def _multiindex(labels: list[tuple[Hashable, ...]]) -> MultiIndex:
+    return MultiIndex.from_tuples(labels)
+
+
+@pytest.mark.parametrize(
+    "columns1, columns2, expected_cols",
+    [
+        (_index([1]), _index([1]), _index([1, 1])),
+        (_index([1]), _index([(1, 2)]), _index([1, (1, 2)])),
+        (_index([1]), _multiindex([(1,)]), _index([1, (1,)])),
+        (_index([1]), _multiindex([(1, 2)]), _index([1, (1, 2)])),
+        (_index([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), (1,)])),
+        (_index([(1, 2)]), _multiindex([(1, 2)]), _index([(1, 2), (1, 2)])),
+        (_index([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+        (_multiindex([(1, 2)]), _index([1]), _index([(1, 2), 1])),
+        (_multiindex([(1, 2)]), _index([(1, 2)]), _multiindex([(1, 2), (1, 2)])),
+        (_multiindex([(1, 2)]), _index([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+        (_multiindex([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), (1,)])),
+        (_multiindex([(1, 2)]), _multiindex([(1, 2)]), _multiindex([(1, 2), (1, 2)])),
+        (_multiindex([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_concat_multiindex_columns_axis1(
+    columns1, columns2, df_single_col, expected_cols
+):
+    df1 = df_single_col.copy()
+    df1.columns = columns1
+    df2 = df_single_col.copy()
+    df2.columns = columns2
+
+    result_columns = pd.concat([df1, df2], axis=1).columns
+    assert_index_equal(result_columns, expected_cols)
+
+
+@pytest.mark.parametrize(
+    "index1, index2, expected_index, expected_join_count",
+    [
+        (_index([1]), _index([1]), _index([1, 1]), 2),
+        (_index([1]), _index([(1, 2)]), _index([1, (1, 2)]), 2),
+        (_index([1]), _multiindex([(1,)]), _index([1, 1]), 2),
+        (_index([1]), _multiindex([(1, 2)]), _index([1, (1, 2)]), 3),
+        (_index([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), 1]), 2),
+        (_index([(1, 2)]), _multiindex([(1, 2)]), _index([(1, 2), (1, 2)]), 3),
+        (_index([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)]), 4),
+        (_multiindex([(1, 2)]), _index([1]), _index([(1, 2), 1]), 3),
+        (_multiindex([(1, 2)]), _index([(1, 2)]), _index([(1, 2), (1, 2)]), 3),
+        (_multiindex([(1, 2)]), _index([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)]), 3),
+        (_multiindex([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), 1]), 3),
+        (
+            _multiindex([(1, 2)]),
+            _multiindex([(1, 2)]),
+            _multiindex([(1, 2), (1, 2)]),
+            4,
+        ),
+        (
+            _multiindex([(1, 2)]),
+            _multiindex([(1, 2, 3)]),
+            _index([(1, 2), (1, 2, 3)]),
+            5,
+        ),
+    ],
+)
+def test_concat_multiindex_row_labels_axis0(
+    index1, index2, df_single_col, expected_index, expected_join_count
+):
+    df1 = df_single_col.copy()
+    df1.index = index1
+    df2 = df_single_col.copy()
+    df2.index = index2
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        res_index = pd.concat([df1, df2], axis=0).to_pandas().index
+        assert isinstance(res_index, MultiIndex) == isinstance(
+            expected_index, MultiIndex
+        )
+
+        # Snowflake backend doesn't support tuples datatype. Values returned are of
+        # array type.
+        if not isinstance(res_index, MultiIndex):
+            expected_values = [
+                list(v) if isinstance(v, tuple) else v for v in expected_index.tolist()
+            ]
+        else:
+            expected_values = expected_index.tolist()
+        assert res_index.tolist() == expected_values
+
+
+@pytest.mark.parametrize(
+    "index1, index2, expected_index, expected_join_count",
+    [
+        (_index([1]), _index([1]), _index([1]), 3),
+        (_index([1]), _multiindex([(1,)]), _index([1]), 3),
+        (_index([(1, 2)]), _multiindex([(1, 2)]), _index([(1, 2)]), 4),
+        (_index([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)]), 5),
+        (_multiindex([(1, 2)]), _index([(1, 2)]), _index([(1, 2)]), 4),
+        (_multiindex([(1, 2)]), _index([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)]), 4),
+        (_multiindex([(1, 2)]), _multiindex([(1, 2)]), _multiindex([(1, 2)]), 5),
+        (
+            _multiindex([(1, 2)]),
+            _multiindex([(1, 2, 3)]),
+            _index([(1, 2), (1, 2, 3)]),
+            6,
+        ),
+    ],
+)
+def test_concat_multiindex_row_labels_axis1(
+    index1, index2, df_single_col, expected_index, expected_join_count
+):
+    df1 = df_single_col.copy()
+    df1.index = index1
+    df2 = df_single_col.copy()
+    df2.index = index2
+
+    with SqlCounter(query_count=1, join_count=expected_join_count):
+        res_index = pd.concat([df1, df2], axis=1).to_pandas().index
+        assert isinstance(res_index, MultiIndex) == isinstance(
+            expected_index, MultiIndex
+        )
+        # Snowflake backend doesn't support tuples datatype. Values returned are of
+        # array type.
+        if not isinstance(res_index, MultiIndex):
+            expected_values = [
+                list(v) if isinstance(v, tuple) else v for v in expected_index.tolist()
+            ]
+        else:
+            expected_values = expected_index.tolist()
+        assert res_index.tolist() == expected_values
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        # single index with integer and array
+        (_index([1]), _index([(1, 2)])),
+        # single index with integer and multiindex with array
+        (_index([1]), _multiindex([(1, 2)])),
+        # single index with array and multiindex with integer
+        (_index([(1, 2)]), _multiindex([(1,)])),
+        # multiindex with array and single index with integer
+        (_multiindex([(1, 2)]), _index([1])),
+        # multiindex with array and single index with integer
+        (_multiindex([(1, 2)]), _multiindex([(1,)])),
+    ],
+)
+def test_concat_multiindex_row_labels_axis1_negative(index1, index2, df_single_col):
+    df1 = df_single_col.copy()
+    df1.index = index1
+    df2 = df_single_col.copy()
+    df2.index = index2
+
+    # This behavior is different with Native pandas, where native pandas cast the index
+    # to object and performs join successfully. In snowflake, join on columns between Number
+    # and Array fails.
+    with SqlCounter(query_count=0):
+        with pytest.raises(SnowparkSQLException, match="Can not convert parameter"):
+            pd.concat([df1, df2], axis=1).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "columns1, columns2, expected_cols",
+    [
+        (_index([1]), _index([1]), _index([1])),
+        (_index([1]), _index([(1, 2)]), _index([1, (1, 2)])),
+        (_index([1]), _multiindex([(1,)]), _index([1, (1,)])),
+        (_index([1]), _multiindex([(1, 2)]), _index([1, (1, 2)])),
+        (_index([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), (1,)])),
+        (_index([(1, 2)]), _multiindex([(1, 2)]), _index([(1, 2)])),
+        (_index([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+        (_multiindex([(1, 2)]), _index([1]), _index([(1, 2), 1])),
+        (_multiindex([(1, 2)]), _index([(1, 2)]), _multiindex([(1, 2)])),
+        (_multiindex([(1, 2)]), _index([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+        (_multiindex([(1, 2)]), _multiindex([(1,)]), _index([(1, 2), (1,)])),
+        (_multiindex([(1, 2)]), _multiindex([(1, 2)]), _multiindex([(1, 2)])),
+        (_multiindex([(1, 2)]), _multiindex([(1, 2, 3)]), _index([(1, 2), (1, 2, 3)])),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_concat_multiindex_columns_axis0(
+    columns1, columns2, df_single_col, expected_cols
+):
+    df1 = df_single_col.copy()
+    df1.columns = columns1
+    df2 = df_single_col.copy()
+    df2.columns = columns2
+
+    result_columns = pd.concat([df1, df2], axis=0).columns
+    assert_index_equal(result_columns, expected_cols)
+
+
+def test_concat_index_with_nulls(df1, df2):
+    df1.set_index([[None, "a", None]])
+    df2.set_index([[4, 5, None, 1]])
+    with SqlCounter(query_count=3):
+        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation([df1, df2]))
+
+
+@pytest.mark.parametrize(
+    "keys",
+    [
+        ["x", "y", "z"],  # length same as number of frames
+        ["x"],  # too short
+        ["x", "y", "z", "a"],  # too long
+        ["x", "y", "x"],  # duplicate keys
+        ["x", "x", "y"],  # duplicate keys
+        [("x", 1), ("y", 2), ("z", 3)],  # keys as tuples
+    ],
+)
+def test_concat_with_keys(df1, df2, series1, keys, axis):
+    expected_join_count = 2 if axis == 1 and len(keys) > 1 else 0
+    with SqlCounter(query_count=4, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([df1, df2, series1], keys=keys, axis=axis),
+        )
+
+
+@pytest.mark.parametrize(
+    "keys",
+    [
+        ["x", "y"],  # length same as number of frames
+        ["x"],  # too short
+        ["x", "y", "z"],  # too long
+        ["x", "x"],  # duplicate keys
+        [("x", 1), ("y", 2)],  # keys as tuples
+    ],
+)
+def test_concat_same_frame_with_keys(df1, keys, axis):
+    expected_join_count = 1 if axis == 1 and len(keys) > 1 else 0
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd", "native_pd", _concat_operation([df1, df1], keys=keys, axis=axis)
+        )
+
+
+@pytest.mark.parametrize("nlevels", [2, 3])
+@pytest.mark.parametrize("keys", [["x", "y"], [("x", 1), ("y", 2)]])
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_multiindex_columns_with_keys_axis1(df1, df2, nlevels, keys):
+    df1 = df1.copy()
+    df1.columns = MultiIndex.from_arrays([df1.columns.tolist()] * nlevels)
+    df2 = df2.copy()
+    df2.columns = MultiIndex.from_arrays([df2.columns.tolist()] * nlevels)
+
+    eval_snowpark_pandas_result(
+        "pd", "native_pd", _concat_operation([df1, df2], axis=1, keys=keys)
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_concat_single_with_key(df1, axis):
+    eval_snowpark_pandas_result(
+        "pd", "native_pd", _concat_operation([df1], keys=["foo"], axis=axis)
+    )
+
+
+@sql_count_checker(query_count=3)
+def test_concat_keys_with_none(df1, df2, axis):
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation(
+            [df1, None, df2],
+            [df1.to_pandas(), None, df2.to_pandas()],
+            keys=["x", "y"],
+            axis=axis,
+        ),
+    )
+
+
+@pytest.mark.parametrize(
+    "names",
+    [
+        ["a", "b"],  # len same as number of levels
+        ["a"],  # too short
+        [],  # empty
+        None,  # None,
+    ],
+)
+@pytest.mark.parametrize(
+    "name1, name2", [("one", "two"), ("one", None), (None, "two"), (None, None)]
+)
+def test_concat_with_keys_and_names(df1, df2, names, name1, name2, axis):
+    with SqlCounter(query_count=0 if name1 is None or axis == 1 else 3, join_count=0):
+        df1 = df1.rename_axis(name1, axis=axis)
+    with SqlCounter(query_count=0 if name2 is None or axis == 1 else 3, join_count=0):
+        df2 = df2.rename_axis(name2, axis=axis)
+
+    expected_join_count = (
+        1 if name1 is not None or name2 is not None or axis == 1 else 0
+    )
+    if axis == 0:
+        if name1 is not None:
+            expected_join_count += 1
+        if name2 is not None:
+            expected_join_count += 1
+        if name1 is not None and name2 is not None:
+            expected_join_count += 1
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation([df1, df2], keys=["x", "y"], names=names, axis=axis),
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_concat_with_keys_and_extra_names_negative(df1, df2, axis):
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation(
+            [df1, df2], keys=["x", "y"], names=["a", "b", "c"], axis=axis
+        ),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Length of names must match number of levels in MultiIndex",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_concat_empty_keys_negative(df1, df2, axis):
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2], keys=[], axis=axis),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+    )
+
+
+@pytest.mark.parametrize("dict_keys", [["x", "y"], ["y", "x"]])
+def test_concat_dict(df1, df2, dict_keys, axis):
+    expected_join_count = 1 if axis == 1 else 0
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        objs = {dict_keys[0]: df1, dict_keys[1]: df2}
+        native_objs = {dict_keys[0]: df1.to_pandas(), dict_keys[1]: df2.to_pandas()}
+        eval_snowpark_pandas_result(
+            "pd", "native_pd", _concat_operation(objs, native_objs, axis=axis)
+        )
+
+
+@pytest.mark.parametrize("dict_keys", [["x", "y"], ["y", "x"]])
+@pytest.mark.parametrize("keys", [["x", "y"], ["y", "x"], ["x"], ["y"]])
+def test_concat_dict_with_keys(df1, df2, dict_keys, keys, axis):
+    expected_join_count = 1 if axis == 1 and len(keys) > 1 else 0
+    with SqlCounter(query_count=3, join_count=expected_join_count):
+        objs = {dict_keys[0]: df1, dict_keys[1]: df2}
+        native_objs = {dict_keys[0]: df1.to_pandas(), dict_keys[1]: df2.to_pandas()}
+        eval_snowpark_pandas_result(
+            "pd",
+            "native_pd",
+            _concat_operation(objs, native_objs, axis=axis, keys=keys),
+        )
+
+
+@sql_count_checker(query_count=2)
+def test_concat_dict_with_invalid_keys_negative(df1, df2, axis):
+    objs = {"x": df1, "y": df2}
+    native_objs = {"x": df1.to_pandas(), "y": df2.to_pandas()}
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation(objs, native_objs, keys=["x", "z"], axis=axis),
+        expect_exception=True,
+        expect_exception_type=KeyError,
+        expect_exception_match="z",
+    )
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_with_mixed_tuples_as_column_labels(sort):
+    # columns have mixed tuples
+    df1 = pd.DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
+    df2 = pd.DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation(
+            [df1, df2], [df1.to_pandas(), df2.to_pandas()], axis=1, sort=sort
+        ),
+    )
+
+
+def test_concat_empty_df(df1, empty_df, zero_rows_df, zero_columns_df, axis):
+    objs = [df1, empty_df, zero_columns_df, zero_rows_df]
+    snow_res = pd.concat(objs)
+
+    native_objs = [df.to_pandas() for df in objs]
+    native_res = native_pd.concat(native_objs)
+
+    with SqlCounter(query_count=1):
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_res, native_res)
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        (
+            MultiIndex.from_tuples([(0, 0), (1, 2)]),
+            MultiIndex.from_tuples([(0, 0), (1, 3)]),
+        ),  # same levels for both frames
+        (
+            MultiIndex.from_tuples([(0, 0), (1, 2)], names=["a", "b"]),
+            MultiIndex.from_tuples([(0, 0), (1, 3)], names=["c", "d"]),
+        ),  # same levels, different names
+        (
+            MultiIndex.from_tuples([(0, 0), (1, 2)], names=["a", "b"]),
+            MultiIndex.from_tuples([(0, 0), (1, 3)], names=["a", "c"]),
+        ),  # same levels, one overlapping name
+    ],
+)
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_multiindex(index1, index2):
+    df1 = pd.DataFrame({"A": [0, 1]}, index=index1)
+    df2 = pd.DataFrame({"B": [2, 3]}, index=index2)
+    eval_snowpark_pandas_result(
+        "pd", "native_pd", _concat_operation([df1, df2], axis=1)
+    )
+
+
+@pytest.mark.parametrize(
+    "type1, type2",
+    [(pd.DataFrame, pd.DataFrame), (pd.Series, pd.Series), (pd.DataFrame, pd.Series)],
+)
+@pytest.mark.parametrize("col1, col2", [("A", None), ("A", "a"), (1, "1")])
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_verify_integrity_axis1(type1, type2, col1, col2):
+    obj1 = (
+        pd.DataFrame([1, 2], columns=[col1])
+        if type1 == pd.DataFrame
+        else pd.Series([1, 2], name=col1)
+    )
+    obj2 = (
+        pd.DataFrame([1, 2], columns=[col2])
+        if type2 == pd.DataFrame
+        else pd.Series([1, 2], name=col2)
+    )
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([obj1, obj2], axis=1, verify_integrity=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "type1, type2", [(pd.DataFrame, pd.DataFrame), (pd.DataFrame, pd.Series)]
+)
+@sql_count_checker(query_count=0)
+def test_concat_verify_integrity_axis1_negative(type1, type2):
+    obj1 = (
+        pd.DataFrame([1, 2], columns=["A"])
+        if type1 == pd.DataFrame
+        else pd.Series([1, 2], name="A")
+    )
+    obj2 = (
+        pd.DataFrame([3, 4], columns=["A"])
+        if type2 == pd.DataFrame
+        else pd.Series([3, 4], name="A")
+    )
+    msg = "Columns have overlapping values"
+    with pytest.raises(ValueError, match=msg):
+        pd.concat([obj1, obj2], axis=1, verify_integrity=True)
+
+
+@sql_count_checker(query_count=0)
+def test_concat_all_series_verify_integrity_axis1_negative():
+    # Native pandas has a bug, it doesn't apply integrity check when all input objects
+    # are series.
+    # Snowpark pandas apply integrity check irrespective of input object types.
+    obj1 = pd.Series([1, 2], name="A")
+    obj2 = pd.Series([3, 4], name="A")
+    with pytest.raises(ValueError, match="Columns have overlapping values"):
+        pd.concat([obj1, obj2], axis=1, verify_integrity=True)
+
+
+@sql_count_checker(query_count=3, join_count=1)
+def test_concat_verify_integrity_axis1_with_keys():
+    # Even though original frames have duplicate columns, after adding keys to column
+    # labels duplicates are resolved, hence no error.
+    obj1 = pd.DataFrame([1, 2], columns=["A"])
+    obj2 = pd.DataFrame([3, 4], columns=["A"])
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([obj1, obj2], axis=1, verify_integrity=True, keys=["x", "y"]),
+    )
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        ([0, 1], [5, 6]),
+        (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (2, 2)])),
+    ],
+)
+@sql_count_checker(query_count=5, union_count=3)
+def test_concat_verify_integrity_axis0(index1, index2):
+    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    eval_snowpark_pandas_result(
+        "pd", "native_pd", _concat_operation([df1, df2], verify_integrity=True)
+    )
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [([0, 1], [0, 1]), (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (1, 2)]))],
+)
+@sql_count_checker(query_count=5, union_count=3)
+def test_concat_verify_integrity_axis0_with_keys(index1, index2):
+    # Even though original frames have duplicate columns, after adding keys to column
+    # labels duplicates are resolved, hence no error.
+    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2], verify_integrity=True, keys=["red", "green"]),
+    )
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [([0, 1], [0, 1]), (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (1, 2)]))],
+)
+@sql_count_checker(query_count=3, union_count=1)
+def test_concat_verify_integrity_axis0_with_ignore_index(index1, index2):
+    # Even though original frames have duplicate columns, ignore_index=True will
+    # replace original index values with values 0 to n-1, hence no error.
+    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2], verify_integrity=True, ignore_index=True),
+    )
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        ([0, 1], [0, 1]),
+        (_multiindex([(1, 1), (1, 2)]), _multiindex([(2, 1), (1, 2)])),
+        ([1, 1], [2, 3]),
+    ],
+)
+@sql_count_checker(query_count=5, union_count=3)
+def test_concat_verify_integrity_axis0_negative(index1, index2):
+    df1 = pd.DataFrame([1, 2], columns=["a"], index=index1)
+    df2 = pd.DataFrame([1, 2], columns=["a"], index=index2)
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2], verify_integrity=True),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="Indexes have overlapping values: ",
+    )
+
+
+@sql_count_checker(query_count=3, union_count=3)
+def test_concat_verify_integrity_axis0_large_overlap_negative():
+    df = pd.DataFrame(data=list(range(100)))
+    msg = "Indexes have overlapping values. Few of them are: .* Please run "
+    with pytest.raises(ValueError, match=msg):
+        pd.concat([df, df], verify_integrity=True)
+
+
+@sql_count_checker(query_count=0)
+def test_concat_levels_negative(df1, df2):
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas doesn't support 'levels' argument in concat API",
+    ):
+        pd.concat([df1, df2], keys=["x", "y"], names=["a", "b"], levels=["a", "b"])
+
+
+def test_concat_sorted_frames():
+    df1 = pd.DataFrame({"A": [5, 2, 7]})
+    df2 = pd.DataFrame({"B": [3, 5, 6]})
+    df3 = pd.DataFrame({"A": [2, 1, 7], "B": [3, 5, 4]})
+    objs = [df1, df2, df3]
+    with SqlCounter(query_count=4):
+        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation(objs))
+    objs = [
+        df1.sort_values(by="A"),
+        df2.sort_values(by="B"),
+        df3.sort_values(by=["B", "A"]),
+    ]
+    with SqlCounter(query_count=4):
+        eval_snowpark_pandas_result("pd", "native_pd", _concat_operation(objs))
+
+
+@pytest.mark.parametrize(
+    "columns1, columns2, expected_rows, expected_cols",
+    [
+        (
+            ["A", "C", "C"],
+            ["A", "C", "C"],
+            [[1, 2, 3], [4, 5, 6]],
+            ["A", "C", "C"],
+        ),  # same columns,
+        (
+            ["A", "C", "C"],
+            ["C", "A", "C"],
+            [[1, 2, 3], [5, 4, 6]],
+            ["A", "C", "C"],
+        ),  # same columns, different order
+        (
+            ["A", "C", "C"],
+            ["A", "B", "C"],
+            [[1, 2, 3, None], [4, 6, None, 5]],
+            ["A", "C", "C", "B"],
+        ),  # duplicate in frame1
+        (
+            ["A", "B", "C"],
+            ["A", "C", "C"],
+            [[1, 2, 3, None], [4, None, 5, 6]],
+            ["A", "B", "C", "C"],
+        ),  # duplicate in frame2
+    ],
+)
+@sql_count_checker(query_count=2, union_count=1)
+def test_concat_duplicate_columns(
+    df1, df2, columns1, columns2, expected_rows, expected_cols
+):
+    df1 = pd.DataFrame([[1, 2, 3]], columns=columns1)
+    df2 = pd.DataFrame([[4, 5, 6]], columns=columns2)
+    expected_df = pd.DataFrame(expected_rows, columns=expected_cols, index=[0, 0])
+    assert_frame_equal(pd.concat([df1, df2]), expected_df)
+
+
+@pytest.mark.parametrize("value1", [4, 1.5, True, "c", (1, 2), {"a": 1}])
+@pytest.mark.parametrize("value2", [4, 1.5, True, "c", (1, 2), {"a": 1}])
+@sql_count_checker(query_count=3, union_count=1)
+def test_concat_type_mismatch(value1, value2):
+    df1 = pd.DataFrame({"A": [value1]})
+    df2 = pd.DataFrame({"A": [value2]})
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2]),
+    )
+
+
+@pytest.mark.parametrize(
+    "index1, index2",
+    [
+        (Index([0]), Index([1])),  # both None
+        (Index([0]), Index([1], name="right")),  # first frame index name is None
+        (Index([0], name="left"), Index([1])),  # second frame index name is None
+        (MultiIndex.from_tuples([(0, 0)]), MultiIndex.from_tuples([(1, 1)])),
+        (
+            MultiIndex.from_tuples([(0, 0)], names=["left", None]),
+            MultiIndex.from_tuples([(1, 1)], names=["left", "right"]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=5, union_count=1)
+def test_concat_none_index_name(index1, index2):
+    df1 = pd.DataFrame([11], columns=["A"], index=index1)
+    df2 = pd.DataFrame([22], columns=["B"], index=index2)
+    _concat_operation([df1, df2]),
+    eval_snowpark_pandas_result(
+        "pd",
+        "native_pd",
+        _concat_operation([df1, df2]),
+    )
diff --git a/tests/integ/modin/test_cut.py b/tests/integ/modin/test_cut.py
new file mode 100644
index 00000000000..745e1703b34
--- /dev/null
+++ b/tests/integ/modin/test_cut.py
@@ -0,0 +1,173 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+from pandas._testing import assert_series_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+)
+
+
+@sql_count_checker(query_count=1, union_count=2)
+def test_cut_empty_series_negative():
+    with pytest.raises(ValueError, match="Cannot cut empty array"):
+        pd.cut(pd.Series(), 3)
+
+
+@pytest.mark.parametrize(
+    "data,cuts",
+    [
+        pytest.param(
+            [-10.0, 0.0, 1.0, 5.6, 9.0, 10.0, 11.0],
+            [0.0, 3.0, 7.8, 10.0],
+        ),
+        ([-10.0, 0.0, 1.0, 5.6, 9.0, 10.0, 11.0], 4),
+    ],
+)
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("include_lowest", [True, False])
+@pytest.mark.parametrize("precision", [3, 8])
+@pytest.mark.parametrize("duplicates", ["raise", "drop"])
+def test_cut_with_no_labels(data, cuts, right, include_lowest, precision, duplicates):
+    snow_series = pd.Series(data)
+    native_series = native_pd.Series(data)
+    kwargs = {
+        "labels": False,
+        "right": right,
+        "include_lowest": include_lowest,
+        "precision": precision,
+        "duplicates": duplicates,
+    }
+
+    with SqlCounter(query_count=2, join_count=1):
+        snow_ans = pd.cut(snow_series, cuts, **kwargs)
+        native_ans = native_pd.cut(native_series, cuts, **kwargs)
+        assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_ans, native_ans)
+
+
+@pytest.mark.xfail(reason="categorical not supported in Snowpark pandas API")
+@pytest.mark.parametrize(
+    "data,cuts,labels",
+    [
+        (
+            [-10.0, 0.0, 1.0, 5.6, 9.0, 10.0, 11.0],
+            [0.0, 3.0, 7.8, 10.0],
+            ["A", "B", "C"],
+        ),
+        ([-10.0, 0.0, 1.0, 5.6, 9.0, 10.0, 11.0], 4, [0, 4, 2, 97]),
+    ],
+)
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("include_lowest", [True, False])
+@pytest.mark.parametrize("precision", [3, 8])
+@pytest.mark.parametrize("ordered", [True, False])
+def test_cut_with_labels(data, cuts, labels, right, include_lowest, precision, ordered):
+    snow_series = pd.Series(data)
+    native_series = native_pd.Series(data)
+
+    kwargs = {
+        "labels": labels,
+        "right": right,
+        "include_lowest": include_lowest,
+        "precision": precision,
+        "ordered": ordered,
+    }
+
+    with SqlCounter(query_count=2, join_count=2):
+        snow_ans = pd.cut(snow_series, cuts, **kwargs)
+
+    native_ans = native_pd.cut(native_series, cuts, **kwargs)
+
+    assert_series_equal(snow_ans, native_ans)
+
+
+def test_cut_with_ordered_is_false_negative():
+    # ordered=False requires labels to be specified.
+    # Test here the scenario where labels=None and ordered=False.
+
+    data, cuts = [1, 2, 3, 2, 5, 6, 7, 3], 3
+    snow_series = pd.Series(data)
+    native_series = native_pd.Series(data)
+
+    kwargs = {"labels": None, "ordered": False}
+
+    expected_msg = "'labels' must be provided if 'ordered = False'"
+    expected_type = ValueError
+
+    with pytest.raises(expected_type, match=re.escape(expected_msg)):
+        native_pd.cut(native_series, cuts, **kwargs)
+
+    with SqlCounter(query_count=0, join_count=0):
+        with pytest.raises(expected_type, match=re.escape(expected_msg)):
+            pd.cut(snow_series, cuts, **kwargs).to_pandas()
+
+
+@sql_count_checker(query_count=1, union_count=2)
+def test_cut_non_increasing_bins_negative():
+    with pytest.raises(
+        ValueError, match=re.escape("bins must increase monotonically.")
+    ):
+        pd.cut(pd.Series([0, 7, 8, 90]), [7, 8, 7])
+
+
+@pytest.mark.parametrize("bins", [[7, 8, 8]])
+@sql_count_checker(query_count=1, union_count=2)
+def test_cut_duplicate_edges_negative(bins):
+    data = [0, 7, 8, 90]
+    try:
+        native_pd.cut(native_pd.Series(data), bins)
+    except ValueError as e:
+        expected_msg = str(e)
+    with pytest.raises(ValueError, match=re.escape(expected_msg)):
+        pd.cut(pd.Series(data), bins)
+
+
+@sql_count_checker(query_count=0)
+def test_cut_retbins_negative():
+    with pytest.raises(NotImplementedError, match="retbins"):
+        pd.cut(pd.Series([1, 4, 7, 90]), 3, retbins=True)
+
+
+@sql_count_checker(query_count=1, union_count=2)
+def test_cut_labels_none_negative():
+    with pytest.raises(
+        NotImplementedError, match="pandas type interval.* is not implemented"
+    ):
+        pd.cut(pd.Series([1, 4, 7, 90]), 3, labels=None)
+
+
+@sql_count_checker(query_count=1, union_count=2)
+@pytest.mark.parametrize(
+    "labels",
+    [
+        [
+            "A",
+            "B",
+            "A",
+        ],  # duplicates are not allowed, check same error message is produced.
+        [
+            "A",
+            "B",
+        ],  # length of labels given does not match number of bins, check same error message is produced.
+    ],
+)
+def test_cut_labels_negative(labels):
+    data, bins = [1, 4, 7, 90], 3
+    try:
+        native_pd.cut(native_pd.Series(data), bins, labels=labels)
+    except Exception as e:
+        e_type = type(e)
+        e_msg = str(e)
+    else:
+        pytest.fail(reason="pandas cut did not raise an exception")
+
+    # Check that pandas exception matches snowpark pandas API exception.
+    with pytest.raises(e_type, match=re.escape(e_msg)):
+        pd.cut(pd.Series(data), bins, labels=labels)
diff --git a/tests/integ/modin/test_default2pandas.py b/tests/integ/modin/test_default2pandas.py
new file mode 100644
index 00000000000..f2d61617a1f
--- /dev/null
+++ b/tests/integ/modin/test_default2pandas.py
@@ -0,0 +1,410 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+import re
+from collections.abc import Generator
+from typing import Callable, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from _pytest.logging import LogCaptureFixture
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark import Session
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from snowflake.snowpark.modin.plugin.default2pandas.stored_procedure_utils import (
+    PACKAGING_REQUIREMENT,
+    SNOWPARK_PANDAS_IMPORT,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+def eval_and_validate_unsupported_methods(
+    func: Callable,
+    func_name: str,
+    native_pd_args: list[Union[native_pd.DataFrame, native_pd.Series]],
+    caplog: Generator[LogCaptureFixture, None, None],
+    inplace: bool = False,
+) -> None:
+    """
+    Apply callable on the given pandas object (native_pd_args) and also the corresponding derived Snowpark pandas objects.
+    Verify the following:
+    1) Apply callable on the Snowpark pandas objects triggers the default_to_pandas call in SnowflakeQueryCompiler
+    2) The results for both Snowpark pandas and pandas matches each other
+    """
+
+    # construct the corresponding Snowpark pandas objects function arguments for the given pandas objects
+    snow_pd_args = []
+    for native_pd_arg in native_pd_args:
+        if isinstance(native_pd_arg, native_pd.DataFrame):
+            snow_pd_args.append(pd.DataFrame(native_pd_arg))
+        else:
+            snow_pd_args.append(pd.Series(native_pd_arg))
+
+    native_pd_args = native_pd_args[0] if len(native_pd_args) == 1 else native_pd_args
+    snow_pd_args = snow_pd_args[0] if len(snow_pd_args) == 1 else snow_pd_args
+
+    # verify SnowflakeQueryCompiler default_to_pandas is called
+    caplog.clear()
+    # Normally, the  warning message only appears once per Python process for
+    # each unique message. Clear the set of printed warnings so that the
+    # warning appears for each test case.
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.DEBUG):
+        result = func(snow_pd_args)
+        if inplace:
+            result = snow_pd_args[0]
+    # This phrase is from the internal message that appears at DEBUG level.
+    assert any(
+        record.levelno == logging.DEBUG
+        and "Default to (native) pandas" in record.message
+        for record in caplog.records
+    )
+    # This phrase is from the WARNING log message that the user will see.
+    assert any(
+        record.levelno == logging.WARNING
+        and re.search(
+            r"Falling back to native pandas with a stored procedure for .* Execution of this method could be slow",
+            record.message,
+        )
+        for record in caplog.records
+    )
+
+    assert func_name in caplog.text
+
+    native_result = func(native_pd_args)
+    if inplace:
+        native_result = native_pd_args[0]
+    # verify the result for snowpark and native pandas after the operation
+    if isinstance(native_result, (native_pd.DataFrame, native_pd.Series)):
+        assert_snowpark_pandas_equal_to_pandas(result, native_result, check_dtype=False)
+    elif isinstance(native_result, np.ndarray):
+        np.testing.assert_array_equal(result, native_result)
+    else:
+        assert native_result == result
+
+
+# unsupported methods for both dataframe and series
+UNSUPPORTED_DATAFRAME_SERIES_METHODS = [
+    (lambda df: df.cumprod(), "cumprod"),
+]
+
+# unsupported methods that can only be applied on dataframe
+UNSUPPORTED_DATAFRAME_METHODS = [
+    (lambda df: df.cumprod(axis=1), "cumprod"),
+]
+
+# unsupported methods that can only be applied on series
+# This set triggers SeriesDefault.register
+UNSUPPORTED_SERIES_METHODS = [
+    (lambda se: se.is_monotonic_increasing, "property fget:is_monotonic_increasing"),
+    (lambda se: se.is_monotonic_decreasing, "property fget:is_monotonic_decreasing"),
+]
+
+# unsupported binary operations that can be applied on both dataframe and series
+# this set triggers default_to_pandas test with Snowpark pandas objects in arguments
+UNSUPPORTED_BINARY_METHODS = [
+    # TODO SNOW-862664, support together with combine
+    # (lambda dfs: dfs[0].combine(dfs[1], np.minimum, fill_value=1), "combine"),
+    (lambda dfs: dfs[0].update(dfs[1]), "update"),
+]
+
+
+# When any unsupported method gets supported, we should run the test to verify (expect failure)
+# and remove the corresponding method in the above list.
+# When most of the methods are supported, we should run all unsupported methods
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_DATAFRAME_SERIES_METHODS + UNSUPPORTED_DATAFRAME_METHODS,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_unsupported_dataframe_methods(func, func_name, caplog):
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    # Native pandas
+    native_df = native_pd.DataFrame(data)
+    eval_and_validate_unsupported_methods(func, func_name, [native_df], caplog)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(
+    query_count=14,
+    fallback_count=2,
+    sproc_count=2,
+    # expect high count because we're falling back to pandas twice.
+    expect_high_count=True,
+)
+def test_unsupported_dataframe_method_only_warns_once(caplog):
+    caplog.clear()
+    # Ideally, we would run this test method in a separate pytest process,
+    # e.g. using the @pytest.mark.forked decorator from pytest-forked. However,
+    # pytest-forked doesn't seem to work with the Snowflake connection object.
+    # It seems that when one process closes the connection object, the other
+    # connection can no longer use its connection object.
+    # Instead, clear WarningMessage.printed_warnings so that there is no record
+    # of already logging a warning for this method.
+    WarningMessage.printed_warnings.clear()
+    df = pd.DataFrame([1, 2])
+    df.cumprod()
+    assert any(
+        record.levelno == logging.WARNING
+        and "Falling back to native pandas with a stored procedure for "
+        + "<function DataFrame.cumprod>. Execution of this method could be "
+        + "slow"
+        in record.message
+        for record in caplog.records
+    )
+    caplog.clear()
+    df.cumprod()
+    assert not any(
+        record.levelno == logging.WARNING
+        and "Falling back to native pandas with a stored procedure for "
+        + "<function DataFrame.cumprod>. Execution of this method could be "
+        + "slow"
+        in record.message
+        for record in caplog.records
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_SERIES_METHODS + UNSUPPORTED_DATAFRAME_SERIES_METHODS,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_unsupported_series_methods(func, func_name, caplog) -> None:
+    native_series = native_pd.Series([5, 4, 0, 6, 6, 4])
+    eval_and_validate_unsupported_methods(func, func_name, [native_series], caplog)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_BINARY_METHODS,
+)
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_unsupported_dataframe_binary_methods(func, func_name, caplog) -> None:
+    # Native pandas
+    native_df1 = native_pd.DataFrame([[0, 1], [2, 3]])
+    native_df2 = native_pd.DataFrame([[4, 5], [6, 7]])
+
+    eval_and_validate_unsupported_methods(
+        func,
+        func_name,
+        [native_df1, native_df2],
+        caplog,
+        inplace=bool(func_name == "update"),
+    )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_BINARY_METHODS,
+)
+@sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+def test_unsupported_series_binary_methods(func, func_name, caplog) -> None:
+    native_se1 = native_pd.Series([1, 2, 3, 0, 2])
+    native_se2 = native_pd.Series([2, 3, 10, 0, 1])
+
+    eval_and_validate_unsupported_methods(
+        func,
+        func_name,
+        [native_se1, native_se2],
+        caplog,
+        inplace=bool(func_name == "update"),
+    )
+
+
+# This set triggers StrDefault
+# The full set of StringMethods test is under tests/integ/modin/strings/
+UNSUPPORTED_STR_METHODS = [
+    (lambda se: se.str.rfind("a"), "Series.rfind"),
+]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_STR_METHODS,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_unsupported_str_methods(func, func_name, caplog) -> None:
+    native_series = native_pd.Series(["bat.aB", "com.fcc", "foo", "bar"])
+    eval_and_validate_unsupported_methods(func, func_name, [native_series], caplog)
+
+
+# This set of method triggers DateTimeDefault
+# The full set of DateTimeAccessor test is under tests/integ/modin/series/test_dt_accessor.py
+UNSUPPORTED_DT_METHODS = [
+    (lambda ds: ds.dt.is_month_start, "property fget:is_month_start"),
+    (lambda ds: ds.dt.dayofweek, "property fget:dayofweek"),
+]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+@pytest.mark.parametrize(
+    "func, func_name",
+    UNSUPPORTED_DT_METHODS,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_unsupported_dt_methods(func, func_name, caplog) -> None:
+    datetime_series = native_pd.Series(
+        native_pd.date_range("2000-01-01", periods=3, freq="h")
+    )
+    eval_and_validate_unsupported_methods(func, func_name, [datetime_series], caplog)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=SnowparkSQLException,
+)
+@sql_count_checker(query_count=4, fallback_count=0, sproc_count=1)
+def test_fallback_in_stored_proc(session):
+    def func(session: Session) -> int:
+        df = pd.DataFrame([1, 2, 3])
+        df.apply(lambda x: x)  # trigger fallback
+        df[0].apply(lambda x: x)
+        # will call transpose (see negative test below)
+        # return df.sum()[0]
+        return df[0][0]
+
+    packages = list(session.get_packages().values())
+    if "pandas" not in packages:
+        packages = [native_pd] + packages
+    if "snowflake-snowpark-python" not in packages:
+        packages = packages + ["snowflake-snowpark-python"]
+    if PACKAGING_REQUIREMENT not in packages:
+        packages.append(PACKAGING_REQUIREMENT)
+    func_proc = session.sproc.register(
+        func,
+        imports=[SNOWPARK_PANDAS_IMPORT],
+        packages=packages,
+    )
+    assert func_proc() == 1
+
+
+# Negative test for SNOW-972740 - Apply on a series changes causes errors in a later transpose
+@sql_count_checker(query_count=3, fallback_count=0, sproc_count=0)
+def test_fallback_transpose_after_apply_in_stored_proc_negative(session):
+    def func(session: Session) -> int:
+        df = pd.DataFrame([1, 2, 3])
+        # apply followed with transpose inside stored procedure fails to resolve
+        # the target path today. This is likely due to how Snowpark pandas is
+        # installed today, should be resolved once Snowpark pandas is installed as
+        # standard conda library. More investigation is needed.
+        # Apply is not an inplace update, here we call df[0] = ... to make sure the
+        # final df have the apply subquery.
+        df[0] = df[0].apply(lambda x: x)
+        df.transpose()
+        return 42
+
+    packages = list(session.get_packages().values())
+    if "pandas" not in packages:
+        packages = [native_pd] + packages
+    if "snowflake-snowpark-python" not in packages:
+        packages = packages + ["snowflake-snowpark-python"]
+    if PACKAGING_REQUIREMENT not in packages:
+        packages.append(PACKAGING_REQUIREMENT)
+    func_proc = session.sproc.register(
+        func,
+        imports=[SNOWPARK_PANDAS_IMPORT],
+        packages=packages,
+    )
+
+    with pytest.raises(SnowparkSQLException) as ex_info:
+        assert func_proc() == 42
+    assert "Python Interpreter Error" in str(ex_info)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=SnowparkSQLException,
+)
+@sql_count_checker(query_count=4, fallback_count=0, sproc_count=1)
+def test_sum_in_stored_proc(session):
+    def func(session: Session) -> int:
+        df = pd.DataFrame([9, 8, 7])
+        return df.sum()[0]
+
+    packages = list(session.get_packages().values())
+    if "pandas" not in packages:
+        packages = [native_pd] + packages
+    if "snowflake-snowpark-python" not in packages:
+        packages = packages + ["snowflake-snowpark-python"]
+    if PACKAGING_REQUIREMENT not in packages:
+        packages.append(PACKAGING_REQUIREMENT)
+    func_proc = session.sproc.register(
+        func,
+        imports=[SNOWPARK_PANDAS_IMPORT],
+        packages=packages,
+    )
+    assert func_proc() == 24
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=SnowparkSQLException,
+)
+@sql_count_checker(query_count=4, fallback_count=0, sproc_count=1)
+def test_transpose_in_stored_proc(session):
+    def func(session: Session) -> int:
+        df = pd.DataFrame([9, 8, 7])
+        return df.transpose()[2][0]
+
+    packages = list(session.get_packages().values())
+    if "pandas" not in packages:
+        packages = [native_pd] + packages
+    if "snowflake-snowpark-python" not in packages:
+        packages = packages + ["snowflake-snowpark-python"]
+    if PACKAGING_REQUIREMENT not in packages:
+        packages.append(PACKAGING_REQUIREMENT)
+    func_proc = session.sproc.register(
+        func,
+        imports=[SNOWPARK_PANDAS_IMPORT],
+        packages=packages,
+    )
+    assert func_proc() == 7
diff --git a/tests/integ/modin/test_df_to_snowpark_pandas.py b/tests/integ/modin/test_df_to_snowpark_pandas.py
new file mode 100644
index 00000000000..c1590bddd52
--- /dev/null
+++ b/tests/integ/modin/test_df_to_snowpark_pandas.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import numpy as np
+import pytest
+
+from snowflake.snowpark._internal.utils import TempObjectType
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.utils import Utils
+
+
+@pytest.fixture(scope="module")
+def tmp_table_basic(session):
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, "id integer, foot_size float, shoe_model varchar"
+    )
+    session.sql(f"insert into {table_name} values (1, 32.0, 'medium')").collect()
+    session.sql(f"insert into {table_name} values (2, 27.0, 'small')").collect()
+    session.sql(f"insert into {table_name} values (3, 40.0, 'large')").collect()
+
+    try:
+        yield table_name
+    finally:
+        Utils.drop_table(session, table_name)
+
+
+@pytest.mark.parametrize("index_col", ["ID", ["FOOT_SIZE"], ["ID", "ID", "FOOT_SIZE"]])
+@pytest.mark.parametrize(
+    "columns",
+    [
+        ["SHOE_MODEL"],
+        ["ID", "FOOT_SIZE", "SHOE_MODEL"],
+        ["FOOT_SIZE", "SHOE_MODEL", "SHOE_MODEL"],
+    ],
+)
+@sql_count_checker(query_count=3)
+def test_to_snowpark_pandas_basic(session, tmp_table_basic, index_col, columns) -> None:
+    snowpark_df = session.table(tmp_table_basic)
+
+    snowpark_pandas_df = snowpark_df.to_snowpark_pandas(index_col, columns)
+
+    # verify index columns
+    snowpandas_index = snowpark_pandas_df.index
+    if index_col:
+        expected_index = index_col if isinstance(index_col, list) else [index_col]
+        assert snowpandas_index.names == expected_index
+    else:
+        assert snowpandas_index.dtype == np.dtype("int64")
+        assert sorted(snowpandas_index.values.tolist()) == [0, 1, 2]
+
+    # verify data columns
+    if columns:
+        assert snowpark_pandas_df.columns.tolist() == columns
+    else:
+        expected_data_cols = ["ID", "FOOT_SIZE", "SHOE_MODEL"]
+        if index_col:
+            # only keep the columns that is not in index_col
+            index_col_list = index_col if isinstance(index_col, list) else [index_col]
+            expected_data_cols = [
+                col for col in expected_data_cols if col not in index_col_list
+            ]
+            assert snowpark_pandas_df.columns.tolist() == expected_data_cols
+
+
+@sql_count_checker(query_count=3)
+def test_to_snowpark_pandas_from_views(session, tmp_table_basic) -> None:
+    snowpark_df = session.sql(
+        f"SELECT ID, SHOE_MODEL FROM {tmp_table_basic} WHERE ID > 1"
+    )
+    snowpark_pandas_df = snowpark_df.to_snowpark_pandas()
+
+    # verify all columns are data columns
+    assert snowpark_pandas_df.columns.tolist() == ["ID", "SHOE_MODEL"]
+    # verify a default row_position column is created
+    snowpandas_index = snowpark_pandas_df.index
+    assert snowpandas_index.dtype == np.dtype("int64")
+    assert sorted(snowpandas_index.values.tolist()) == [0, 1]
+
+
+@sql_count_checker(query_count=3)
+def test_to_snowpark_pandas_with_operations(session, tmp_table_basic) -> None:
+    snowpark_df = session.table(tmp_table_basic)
+    snowpark_df = (
+        snowpark_df.select(
+            Column("ID"),
+            Column("FOOT_SIZE").as_('"size"'),
+            Column("SHOE_MODEL").as_('"model"'),
+        )
+        .where(Column("ID") > 2)
+        .select(Column('"size"'), Column('"model"'))
+    )
+
+    snowpark_pandas_df = snowpark_df.to_snowpark_pandas()
+    # verify all columns are data columns
+    assert snowpark_pandas_df.columns.tolist() == ["size", "model"]
+    # verify a default row_position column is created
+    snowpandas_index = snowpark_pandas_df.index
+    assert snowpandas_index.dtype == np.dtype("int64")
+    assert sorted(snowpandas_index.values.tolist()) == [0]
+
+
+@sql_count_checker(query_count=0)
+def test_to_snowpark_pandas_duplicated_columns_raises(session, tmp_table_basic) -> None:
+    snowpark_df = session.table(tmp_table_basic)
+    snowpark_df = snowpark_df.select(
+        Column("ID"),
+        Column("FOOT_SIZE").as_('"shoe"'),
+        Column("SHOE_MODEL").as_('"shoe"'),
+    )
+
+    with pytest.raises(SnowparkSQLException, match="duplicate column name 'shoe'"):
+        snowpark_df.to_snowpark_pandas()
+
+
+@sql_count_checker(query_count=1)
+def test_to_snowpark_pandas_columns_not_list_raises(session, tmp_table_basic) -> None:
+    snowpark_df = session.table(tmp_table_basic)
+
+    with pytest.raises(ValueError, match="columns must be provided as list"):
+        snowpark_df.to_snowpark_pandas(columns="FOOT_SIZE")
diff --git a/tests/integ/modin/test_dtype_mapping.py b/tests/integ/modin/test_dtype_mapping.py
new file mode 100644
index 00000000000..81d69b7635f
--- /dev/null
+++ b/tests/integ/modin/test_dtype_mapping.py
@@ -0,0 +1,490 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import json
+import re
+from datetime import date, time
+from decimal import Decimal
+from json import JSONDecodeError
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from numpy import dtype
+from pandas import Timestamp
+from pandas._testing import assert_frame_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter
+from tests.utils import Utils
+
+
+# TODO 849288 verify df.dtypes too
+@pytest.mark.parametrize(
+    "col_name_type, samples, expected_dtype, expected_to_pandas_dtype, expected_to_pandas",
+    [
+        (
+            "int8 number",
+            "values (-128),(127)",
+            dtype("int64"),
+            dtype("int8"),
+            native_pd.DataFrame(
+                [-128, 127], index=pd.Index([0, 1]), columns=["INT8"]
+            ).astype("int8"),
+        ),
+        (
+            "int16 number",
+            "values (-32768),(32767)",
+            dtype("int64"),
+            dtype("int16"),
+            native_pd.DataFrame(
+                [-32768, 32767], index=pd.Index([0, 1]), columns=["INT16"]
+            ).astype("int16"),
+        ),
+        (
+            "int32 number",
+            "values (-2147483648),(2147483647)",
+            dtype("int64"),
+            dtype("int32"),
+            native_pd.DataFrame(
+                [-2147483648, 2147483647], index=pd.Index([0, 1]), columns=["INT32"]
+            ).astype("int32"),
+        ),
+        (
+            "int64 number",
+            "values (-9223372036854775808),(9223372036854775807)",
+            dtype("int64"),
+            dtype("int64"),
+            native_pd.DataFrame(
+                [-9223372036854775808, 9223372036854775807],
+                index=pd.Index([0, 1]),
+                columns=["INT64"],
+            ),
+        ),
+        (
+            '">int64" number',
+            "values (-99999999999999999999999999999999999999),(99999999999999999999999999999999999999)",
+            dtype("int64"),
+            dtype("float64"),
+            native_pd.DataFrame(
+                [
+                    -99999999999999999999999999999999999999,
+                    99999999999999999999999999999999999999,
+                ],
+                index=pd.Index([0, 1]),
+                columns=[">int64"],
+            ).astype("float64"),
+        ),
+        (
+            "decimal number(5,2)",
+            "values (-128.02),(127.99)",
+            dtype("float64"),
+            dtype("float64"),
+            native_pd.DataFrame(
+                [-128.02, 127.99], index=pd.Index([0, 1]), columns=["DECIMAL"]
+            ).astype("float64"),
+        ),
+        # SNOW-990542 changed behavior to explicitly downcast to float. This means, the resulting
+        # value is -1e36 and 1e36 for this test. Follow Snowpark Python behavior here.
+        #         (
+        #             "large_decimal number(38,2)",
+        #             "values (-999999999999999999999999999999999999.99),(999999999999999999999999999999999999.99)",
+        #             dtype("float64"),
+        #             dtype("float64"),
+        #             native_pd.DataFrame(
+        #                 [
+        #                     Decimal("-999999999999999999999999999999999999.99"),
+        #                     Decimal("999999999999999999999999999999999999.99"),
+        #                 ],
+        #                 index=pd.Index([0, 1]),
+        #                 columns=["LARGE_DECIMAL"],
+        #             ),
+        #         ),  # python decimal object
+        (
+            "bool boolean",
+            "values (true),(false)",
+            dtype("bool"),
+            dtype("bool"),
+            native_pd.DataFrame(
+                [True, False], index=pd.Index([0, 1]), columns=["BOOL"]
+            ),
+        ),
+        (
+            "bool_with_null boolean",
+            "values (true),(null)",
+            dtype("bool"),
+            dtype("O"),
+            native_pd.DataFrame(
+                [True, None], index=pd.Index([0, 1]), columns=["BOOL_WITH_NULL"]
+            ),
+        ),
+        (
+            "float64 float",
+            "values (3.14),(9.999999)",
+            dtype("float64"),
+            dtype("float64"),
+            native_pd.DataFrame(
+                [3.14, 9.999999], index=pd.Index([0, 1]), columns=["FLOAT64"]
+            ),
+        ),
+        (
+            "str text",
+            "values ('abc'),('xxyyzz')",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                ["abc", "xxyyzz"], index=pd.Index([0, 1]), columns=["STR"]
+            ),
+        ),
+        (
+            "bin binary",
+            "values ('48454C50'),('48454C50')",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                [b"HELP", b"HELP"], index=pd.Index([0, 1]), columns=["BIN"]
+            ),
+        ),
+        (
+            "variant variant",
+            """select PARSE_JSON(' { "key1": "value1", "key2": NULL } ')""",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                np.array([{"key1": "value1", "key2": None}]),
+                index=pd.Index([0]),
+                columns=["VARIANT"],
+            ),
+        ),
+        (
+            "float_in_variant variant",
+            """values (3.14::float), (9.99999::float)""",
+            dtype("O"),
+            dtype("float64"),
+            native_pd.DataFrame(
+                np.array([3.14, 9.99999]),
+                index=pd.Index([0, 1]),
+                columns=["FLOAT_IN_VARIANT"],
+            ),
+        ),
+        (
+            "multitype variant, multitype2 variant",
+            [
+                """select PARSE_JSON(' { "key1": "value1", "key2": NULL } '), 'string'::variant""",
+                """select ARRAY_CONSTRUCT('Washington', 'Oregon', 'California'), 1""",
+                """select OBJECT_CONSTRUCT('first_name', 'Mickey', 'last_name', 'Mouse'), to_variant(to_timestamp('2111','YYYY'))""",
+                """select to_variant(to_time('01:23:45')), to_variant(to_date('2016-05-01'))""",
+                """select 'string'::variant, to_variant(to_timestamp('2111','YYYY'))""",
+                """select to_variant(to_timestamp('2111','YYYY')), 1""",
+            ],
+            [dtype("O"), dtype("O")],
+            [dtype("O"), dtype("O")],
+            native_pd.DataFrame(
+                np.array(
+                    [
+                        [{"key1": "value1", "key2": None}, "string"],
+                        [["Washington", "Oregon", "California"], 1],
+                        [
+                            {"first_name": "Mickey", "last_name": "Mouse"},
+                            native_pd.to_datetime("2111-01-01 00:00:00"),
+                        ],
+                        [time(1, 23, 45), date(2016, 5, 1)],
+                        ["string", native_pd.to_datetime("2111-01-01 00:00:00")],
+                        [native_pd.to_datetime("2111-01-01 00:00:00"), 1],
+                    ],
+                    dtype="object",
+                ),
+                index=pd.Index([0, 1, 2, 3, 4, 5]),
+                columns=["MULTITYPE", "MULTITYPE2"],
+            ),
+        ),
+        (
+            "obj object",
+            """select PARSE_JSON(' { "key1": "value1", "key2": NULL } ')""",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                np.array([{"key1": "value1", "key2": None}]),
+                index=pd.Index([0]),
+                columns=["OBJ"],
+            ),
+        ),
+        (
+            "arr array",
+            "select ARRAY_CONSTRUCT(1,2,3, ARRAY_CONSTRUCT(1,2,3))",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                ["val"],
+                index=pd.Index([0]),
+                columns=["ARR"],
+            ).applymap(
+                lambda val: [1, 2, 3, [1, 2, 3]]
+            ),  # use `applymap` to create an array cell
+        ),
+        (
+            "date date",
+            "values ('2016-05-01'::date), ('2016-05-02'::date)",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                [date(2016, 5, 1), date(2016, 5, 2)],
+                index=pd.Index([0, 1]),
+                columns=["DATE"],
+            ),
+        ),
+        (
+            "time time",
+            "values ('00:00:01'::time), ('23:59:59'::time)",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                [time(0, 0, 1), time(23, 59, 59)],
+                index=pd.Index([0, 1]),
+                columns=["TIME"],
+            ),
+        ),
+        (
+            "timestamp_ntz timestamp_ntz",
+            "values ('2023-01-01 00:00:01.001'), ('2023-12-31 23:59:59.999')",
+            dtype("<M8[ns]"),
+            dtype("<M8[ns]"),
+            native_pd.DataFrame(
+                [
+                    Timestamp("2023-01-01 00:00:01.001"),
+                    Timestamp("2023-12-31 23:59:59.999"),
+                ],
+                index=pd.Index([0, 1]),
+                columns=["TIMESTAMP_NTZ"],
+            ),
+        ),
+        (
+            "timestamp_ltz timestamp_ltz",
+            "values ('2023-01-01 00:00:01.001'), ('2023-12-31 23:59:59.999')",
+            dtype("<M8[ns]"),
+            "datetime64[ns, America/Los_Angeles]",
+            native_pd.DataFrame(
+                [
+                    Timestamp("2023-01-01 00:00:01.001", tz="America/Los_Angeles"),
+                    Timestamp("2023-12-31 23:59:59.999", tz="America/Los_Angeles"),
+                ],
+                index=pd.Index([0, 1]),
+                columns=["TIMESTAMP_LTZ"],
+            ),
+        ),
+        (
+            "timestamp_tz timestamp_tz",
+            "values ('2023-01-01 00:00:01.001 +0000'), ('2023-12-31 23:59:59.999 +1000')",  # timestamp_tz only supports tz offset
+            dtype("<M8[ns]"),
+            "datetime64[ns, America/Los_Angeles]",  # to_pandas convert tz to local timezone
+            native_pd.DataFrame(
+                [
+                    Timestamp(
+                        "2022-12-31 16:00:01.001000-08:00", tz="America/Los_Angeles"
+                    ),
+                    Timestamp(
+                        "2023-12-31 05:59:59.999000-08:00", tz="America/Los_Angeles"
+                    ),
+                ],
+                index=pd.Index([0, 1]),
+                columns=["TIMESTAMP_TZ"],
+            ),
+        ),
+        (
+            "geography geography",
+            "select to_geography('POINT(-122.35 37.55)')",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                ["val"],
+                index=pd.Index([0]),
+                columns=["GEOGRAPHY"],
+            ).applymap(
+                lambda val: json.dumps(
+                    {"coordinates": [-122.35, 37.55], "type": "Point"}, indent=2
+                )
+            ),  # use `applymap` to create an array cell
+        ),
+        (
+            "geometry geometry",
+            "select to_geometry('POINT(-122.35 37.55)')",
+            dtype("O"),
+            dtype("O"),
+            native_pd.DataFrame(
+                ["val"],
+                index=pd.Index([0]),
+                columns=["GEOMETRY"],
+            ).applymap(
+                lambda val: """{
+  "coordinates": [
+    -1.223500000000000e+02,
+    3.755000000000000e+01
+  ],
+  "type": "Point"
+}"""
+            ),  # use `applymap` to create an array cell
+        ),
+    ],
+)
+def test_read_snowflake_data_types(
+    session,
+    test_table_name,
+    col_name_type,
+    samples,
+    expected_dtype,
+    expected_to_pandas_dtype,
+    expected_to_pandas,
+):
+    expected_query_count = 9 if isinstance(samples, list) and len(samples) > 1 else 4
+    with SqlCounter(query_count=expected_query_count):
+        Utils.create_table(session, test_table_name, col_name_type, is_temporary=True)
+        if not isinstance(samples, list):
+            samples = [samples]
+        for sample in samples:
+            session.sql(f"insert into {test_table_name} {sample}").collect()
+        df = pd.read_snowflake(test_table_name)
+        assert (
+            df.dtypes.to_list() == [expected_dtype]
+            if not isinstance(expected_dtype, list)
+            else expected_dtype
+        ), f"unexpected dtypes {df.dtypes.to_list()}, expected [{expected_dtype}]"
+        to_pandas_df = df.to_pandas()
+        assert (
+            to_pandas_df.dtypes.to_list() == [expected_to_pandas_dtype]
+            if not isinstance(expected_to_pandas_dtype, list)
+            else expected_to_pandas_dtype
+        ), f"unexpected to_pandas() dtypes {to_pandas_df.dtypes.to_list()}, expected [{expected_to_pandas_dtype}]"
+        assert_frame_equal(
+            to_pandas_df, expected_to_pandas
+        )  # Snowpark pandas APIs use Int index type while pandas use RangeIndex
+
+
+@pytest.mark.parametrize(
+    "col_name_type, samples, expected_dtype, expected_to_pandas_dtype, expected_to_pandas",
+    [
+        (
+            "large_decimal number(38,2)",
+            "values (-999999999999999999999999999999999999.99),(999999999999999999999999999999999999.99)",
+            dtype("float64"),
+            dtype("float64"),
+            native_pd.DataFrame(
+                [
+                    Decimal("-999999999999999999999999999999999999.99"),
+                    Decimal("999999999999999999999999999999999999.99"),
+                ],
+                index=pd.Index([0, 1]),
+                columns=["LARGE_DECIMAL"],
+            ),
+        ),  # python decimal object
+    ],
+)
+def test_read_snowflake_data_types_negative(
+    session,
+    test_table_name,
+    col_name_type,
+    samples,
+    expected_dtype,
+    expected_to_pandas_dtype,
+    expected_to_pandas,
+):
+    # SNOW-990542 changed behavior to explicitly downcast to float. This means, the resulting
+    # value is -1e36 and 1e36 for this test as the decimal above can not be represented as a floating pointer number.
+    # For now to be compatible with the current Snowpark behavior, we track this as a negative test here.
+    # However, in the future Snowpark's behavior may change to address this large-scale floating point number case
+    # in a different way and we should refresh with their current behavior.
+
+    expected_query_count = 9 if isinstance(samples, list) and len(samples) > 1 else 4
+    with SqlCounter(query_count=expected_query_count):
+        Utils.create_table(session, test_table_name, col_name_type, is_temporary=True)
+        if not isinstance(samples, list):
+            samples = [samples]
+        for sample in samples:
+            session.sql(f"insert into {test_table_name} {sample}").collect()
+        df = pd.read_snowflake(test_table_name)
+        assert (
+            df.dtypes.to_list() == [expected_dtype]
+            if not isinstance(expected_dtype, list)
+            else expected_dtype
+        ), f"unexpected dtypes {df.dtypes.to_list()}, expected [{expected_dtype}]"
+        to_pandas_df = df.to_pandas()
+        assert (
+            to_pandas_df.dtypes.to_list() == [expected_to_pandas_dtype]
+            if not isinstance(expected_to_pandas_dtype, list)
+            else expected_to_pandas_dtype
+        ), f"unexpected to_pandas() dtypes {to_pandas_df.dtypes.to_list()}, expected [{expected_to_pandas_dtype}]"
+
+        # produces error due to floating point representation gap:
+        # E   DataFrame.iloc[:, 0] (column name="LARGE_DECIMAL") values are different (100.0 %)
+        # E   [index]: [0, 1]
+        # E   [left]:  [-1e+36, 1e+36]
+        # E   [right]: [-999999999999999999999999999999999999.99, 999999999999999999999999999999999999.99]
+        with pytest.raises(
+            AssertionError,
+            match=re.escape(
+                'DataFrame.iloc[:, 0] (column name="LARGE_DECIMAL") are different'
+            ),
+        ):
+            assert_frame_equal(
+                to_pandas_df, expected_to_pandas, check_dtype=False
+            )  # Snowpark pandas APIs use Int index type while pandas use RangeIndex
+
+
+@pytest.mark.parametrize(
+    "col_name_type, samples, expected_to_pandas_dtype, expected_to_pandas",
+    [
+        (
+            "multitype variant",
+            [
+                """select PARSE_JSON(' { "key1": "value1", "key2": NULL } ')""",
+                """select ARRAY_CONSTRUCT('Washington', 'Oregon', 'California', NULL)""",
+                """select OBJECT_CONSTRUCT('first_name', 'Mickey', 'last_name', 'Mouse')""",
+                """select 1""",
+                """select 'string'::variant""",
+            ],
+            dtype("O"),
+            native_pd.DataFrame(
+                np.array(
+                    [
+                        '{\n  "key1": "value1",\n  "key2": null\n}',
+                        '[\n  "Washington",\n  "Oregon",\n  "California",\n  undefined\n]',
+                        '{\n  "first_name": "Mickey",\n  "last_name": "Mouse"\n}',
+                        "1",
+                        '"string"',
+                    ],
+                ),
+                index=pd.Index([0, 1, 2, 3, 4]),
+                columns=["MULTITYPE"],
+            ),
+        ),
+        (
+            "arr array",
+            "select ARRAY_CONSTRUCT(1,2,3,NULL)",
+            dtype("O"),
+            native_pd.DataFrame(
+                ["[\n  1,\n  2,\n  3,\n  undefined\n]"],
+                index=pd.Index([0]),
+                columns=["ARR"],
+            ),
+        ),
+    ],
+)
+def test_read_snowflake_data_types_array_undefined_negative(
+    session,
+    test_table_name,
+    col_name_type,
+    samples,
+    expected_to_pandas_dtype,
+    expected_to_pandas,
+):
+    expected_query_count = 8 if isinstance(samples, list) and len(samples) > 1 else 4
+    with SqlCounter(query_count=expected_query_count):
+        Utils.create_table(session, test_table_name, col_name_type, is_temporary=True)
+        if not isinstance(samples, list):
+            samples = [samples]
+        for sample in samples:
+            session.sql(f"insert into {test_table_name} {sample}").collect()
+        # to_pandas will fail when `undefined` exists in array
+        with pytest.raises(JSONDecodeError, match="Expecting value"):
+            pd.read_snowflake(test_table_name).to_pandas()
diff --git a/tests/integ/modin/test_from_pandas_to_pandas.py b/tests/integ/modin/test_from_pandas_to_pandas.py
new file mode 100644
index 00000000000..a1bdb16beed
--- /dev/null
+++ b/tests/integ/modin/test_from_pandas_to_pandas.py
@@ -0,0 +1,623 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime
+from typing import Any, Union
+from unittest.mock import patch
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas import DatetimeTZDtype
+from pandas._testing import assert_frame_equal, assert_index_equal, assert_series_equal
+from pandas.core.dtypes.common import is_datetime64_any_dtype
+
+import snowflake.snowpark
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    generate_random_alphanumeric,
+)
+from snowflake.snowpark.types import ArrayType, MapType, StructType
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    BASIC_TYPE_DATA1,
+    BASIC_TYPE_DATA2,
+    VALID_PANDAS_LABELS,
+    assert_snowpark_pandas_equal_to_pandas,
+)
+from tests.utils import Utils
+
+# Those index types are used to verify the type mapping in a round trip, i.e., from_pandas to to_pandas.
+# TODO: SNOW-841273 verity the type mapping from Snowflake table to Snowpark pandas
+FROM_TO_PANDAS_VALUE_TYPE_MATCH_INDICES = (
+    "string",
+    "int",
+    "range",
+    "float",
+    "repeats",
+    "bool-dtype",
+    "tuples",
+    "multi",
+    # NumericIndex is a pandas 2.x feature
+    "uint-small",
+    "num_int8",
+    "num_int16",
+    "num_int64",
+    "num_float64",
+)
+
+# These indices have matched data values, but index type is not recovered
+FROM_TO_PANDAS_TYPE_MISMATCH_INDICES = {
+    # key: name, actual dtype of the index to_pandas()
+    "bool-object": "bool",
+    "string-python": "object",
+    # https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-pandas#snowflake-to-pandas-data-mapping
+    "nullable_int": "float64",
+    "nullable_uint": "float64",
+    "nullable_float": "float64",
+    "nullable_bool": "bool",
+    # There is no unsigned integer type in snowflake, and uint64 data will be converted to
+    # FIXED type in snowflake. When the data is converted back to pandas, it will become
+    # either int16 or float64 (if values are larger than max int16)
+    "uint": "int16",
+    # NumericIndex is a pandas 2.x feature
+    "num_int32": "int16",
+    "num_uint64": "int16",
+    "num_uint32": "int16",
+    "num_uint16": "int16",
+    "num_uint8": "int16",
+    "num_float32": "float64",
+    "multi-with-dt64tz-level": "object",
+}
+
+# Both the data values and the type of these indices are mismatched with native pandas
+FROM_TO_PANDAS_VALUE_TYPE_MISMATCH_INDICES = (
+    "categorical",
+    "interval",
+    "complex64",
+    "complex128",
+    "period",
+    # failed due to AssertionError: (None, <BusinessDay>)
+    "datetime",
+    "datetime-tz",
+    "timedelta",
+)
+
+
+def check_result_from_and_to_pandas(
+    data: Any,
+    *,
+    index: Union[native_pd.Index, str] = None,
+    columns: Union[str, list[str], native_pd.Index] = None,
+    expected_index_type: str = None,
+    expected_dtypes: list[str] = None,
+    **kwargs,
+) -> None:
+    """
+    Check a pandas dataframe is consistent with the result after converting to a snowpark pandas dataframe and converting
+    back.
+    Args:
+        data: data field to create pandas dataframe
+        index: index field to create pandas dataframe
+        columns: used to populate columns filed to create pandas dataframe
+        expected_index_type: if not None then check snowpark pandas dataframe's index type is expected type
+        expected_dtypes: if not None then check snowpark pandas dataframe's column data types
+        **kwargs: other kwargs pass to assert_frame_equal
+    Raises:
+        AssertionError if the converted dataframe does not match with the original one
+    """
+    if columns is not None and not isinstance(columns, (list, pd.Index)):
+        columns = [columns]
+    native_df = native_pd.DataFrame(data=data, index=index, columns=columns)
+    snow_df = pd.DataFrame(native_df)
+    kwargs.update(expected_index_type=expected_index_type)
+    kwargs.update(expected_dtypes=expected_dtypes)
+    assert_snowpark_pandas_equal_to_pandas(snow_df, native_df, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "name",
+    FROM_TO_PANDAS_VALUE_TYPE_MATCH_INDICES,
+)
+def test_value_type_match_index_type(name, indices_dict):
+    expected_query_count = 1 if name in ("uint-small", "tuples", "multi") else 6
+    with SqlCounter(query_count=expected_query_count):
+        index = indices_dict[name]
+        size = len(index)
+        data = np.random.randn(size)
+        check_result_from_and_to_pandas(
+            data,
+            index=index,
+            columns=name,
+        )
+
+
+@pytest.mark.parametrize("name", FROM_TO_PANDAS_TYPE_MISMATCH_INDICES)
+def test_type_mismatch_index_type(name, indices_dict):
+    expected_query_count = (
+        1 if name == "uint-small" or "multi-with-dt64tz" in name else 6
+    )
+    with SqlCounter(query_count=expected_query_count):
+        index = indices_dict[name]
+        expected_index_type = FROM_TO_PANDAS_TYPE_MISMATCH_INDICES[name]
+        size = len(index)
+        data = np.random.randn(size)
+        check_result_from_and_to_pandas(
+            data,
+            index=index,
+            columns=name,
+            expected_index_type=expected_index_type,
+            check_index_type=False,
+        )
+
+
+@pytest.mark.parametrize("name", FROM_TO_PANDAS_VALUE_TYPE_MISMATCH_INDICES)
+def test_value_type_mismatch_index_type(name, indices_dict):
+    expected_query_count = 6 if "datetime" in name else 0
+    with SqlCounter(query_count=expected_query_count):
+        index = indices_dict[name]
+        size = len(index)
+        data = np.random.randn(size)
+        # 1. raises AssertionError from assert_frame_equal()
+        # 2. raises ArrowNotImplementedError when converting the pandas dataframe to parquet
+        # files in connector's write_pandas, because arrow doesn't support complex128 type
+        # see https://issues.apache.org/jira/browse/ARROW-14268
+        with pytest.raises((AssertionError, NotImplementedError)):
+            check_result_from_and_to_pandas(
+                data, index=index, columns=name, check_index_type=False
+            )
+
+
+@sql_count_checker(query_count=1)
+def test_basic_type_data():
+    check_result_from_and_to_pandas(
+        [BASIC_TYPE_DATA1, BASIC_TYPE_DATA2],
+        expected_dtypes=[
+            "int64",  # snowflake maps all integer type back to int64
+            "object",
+            "float64",
+            "object",
+            "object",
+            "bool",
+            "object",
+        ],
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_base_index():
+    # base class index (appears sometimes as well!)
+    check_result_from_and_to_pandas(
+        [1, 2, 3],
+        index=native_pd.Index([8, 9, 9]),
+        columns="base-index-homogeneous-type",
+    )
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize(
+    "data,index,columns",
+    [
+        ([1, 2, 3], ["a", 10, 3.131], "base-index-non-homogeneous-type"),
+        (None, ["A", "B", -2], ["X"]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_base_index_with_variant_data(data, index, columns):
+    check_result_from_and_to_pandas(
+        data=data,
+        index=native_pd.Index(["a", 10, 3.141]),
+        columns=columns,
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize("col_name", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=1)
+def test_column_name(col_name):
+    check_result_from_and_to_pandas(
+        [[1, 2], [2, 3]],
+        columns=["5", col_name],
+    )
+
+
+@pytest.mark.parametrize("index_name", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=1)
+def test_index_name(index_name):
+    check_result_from_and_to_pandas([1, 2], index=pd.RangeIndex(2, name=index_name))
+
+
+@pytest.mark.parametrize("pandas_label", [None, *VALID_PANDAS_LABELS])
+@sql_count_checker(query_count=1)
+def test_column_index_names(pandas_label):
+    snow_df = pd.DataFrame({pandas_label: [1, 2]})
+    expected_columns_index = pd.Index([pandas_label])
+    # verify columns is same as original dataframe.
+    assert_index_equal(snow_df.columns, expected_columns_index)
+    # convert back to native pandas and verify columns is same as the original dataframe
+    native_df = snow_df.to_pandas()
+    assert_index_equal(native_df.columns, expected_columns_index)
+
+
+@pytest.mark.parametrize("name", [None, *VALID_PANDAS_LABELS])
+@sql_count_checker(query_count=1)
+def test_to_pandas_column_index_names(name):
+    df = pd.DataFrame(
+        data=[[1] * 2, [2] * 2], columns=native_pd.Index([1, 2], name=name)
+    )
+    assert df.columns.names == [name]
+    pdf = df.to_pandas()
+    assert pdf.columns.names == [name]
+
+
+@sql_count_checker(query_count=1)
+def test_from_to_pandas_datetime64_support():
+    # This test verifies the datetime64 columns and index conversions, including from and to pandas.
+    # The test data columns include both datetime64 and other types to make sure all types are correctly converted.
+
+    # DatetimeIndex will be converted to Snowflake timestamp type (similar to datetime64[ns])
+    test_datetime_index = native_pd.DatetimeIndex(
+        ["2017-12-31 16:00:00", "2017-12-31 17:00:00", "2017-12-31 18:00:00"],
+        dtype="datetime64[ns]",
+    )
+
+    test_data_columns = {
+        "int": [1, 2, 3],
+        "timestamp": [
+            native_pd.Timestamp("20010101"),
+            native_pd.Timestamp("20010102"),
+            native_pd.Timestamp("20010103"),
+        ],
+        "str": ["1", "2", "3"],
+        "days": np.arange("2005-02-01", "2005-02-04", dtype="datetime64[D]"),
+        "years": np.arange("2005", "2008", dtype="datetime64[Y]"),
+        "nanoseconds": [
+            native_pd.Timestamp(1513393355.123456789),
+            native_pd.Timestamp(1513393355.123456790),
+            native_pd.Timestamp(1513393355.123456791),
+        ],
+        "datetimeIndex": test_datetime_index,
+    }
+    # pandas only use datetime64[ns] for all datetime64 columns so snowpark pandas follows it too
+    expected_dtypes = [
+        "int64",
+        "datetime64[ns]",
+        "object",
+        "datetime64[ns]",
+        "datetime64[ns]",
+        "datetime64[ns]",
+        "datetime64[ns]",
+    ]
+
+    check_result_from_and_to_pandas(
+        test_data_columns,
+        index=test_datetime_index,
+        expected_dtypes=expected_dtypes,
+        expected_index_type="datetime64[ns]",
+    )
+
+
+@sql_count_checker(query_count=3)
+def test_rw_datetimeindex():
+    test_datetime_index = native_pd.DatetimeIndex(
+        ["2017-12-31 16:00:00", "2017-12-31 17:00:00", "2017-12-31 18:00:00"],
+        dtype="datetime64[ns]",
+        freq="H",
+    )
+    test_datetime_index_tz = native_pd.DatetimeIndex(
+        [
+            "2017-12-31 16:00:00-08:00",
+            "2017-12-31 17:00:00-08:00",
+            "2017-12-31 18:00:00-08:00",
+        ],
+        dtype="datetime64[ns, US/Pacific]",
+        freq="H",
+    )
+    df = pd.DataFrame({"ntz": test_datetime_index, "tz": test_datetime_index_tz})
+    assert_series_equal(
+        df.dtypes,
+        native_pd.Series(["datetime64[ns]", "datetime64[ns]"], index=["ntz", "tz"]),
+    )
+    assert_series_equal(
+        df.to_pandas().dtypes,
+        native_pd.Series(
+            ["datetime64[ns]", "datetime64[ns, America/Los_Angeles]"],
+            index=["ntz", "tz"],
+        ),
+    )
+
+    # When pulling from a datetime index from Snowpark pandas, `freq` is not supported and only timezone offset can be
+    # preserved
+    ntz_index = df.set_index("ntz").index
+    assert_index_equal(
+        ntz_index,
+        native_pd.DatetimeIndex(
+            ["2017-12-31 16:00:00", "2017-12-31 17:00:00", "2017-12-31 18:00:00"],
+            dtype="datetime64[ns]",
+            name="ntz",
+            freq=None,  # freq has been lost
+        ),
+    )
+
+    tz_index = df.set_index("tz").index
+    assert_index_equal(
+        tz_index,
+        native_pd.DatetimeIndex(
+            [
+                "2017-12-31 16:00:00-08:00",
+                "2017-12-31 17:00:00-08:00",
+                "2017-12-31 18:00:00-08:00",
+            ],
+            # it will get the session timezone as the timezone, same as to_pandas
+            dtype=pd.DatetimeTZDtype(unit="ns", tz="America/Los_Angeles"),
+            # has preserved
+            name="tz",
+            freq=None,  # freq has been lost
+        ),
+    )
+
+
+@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "US/Eastern"])
+@sql_count_checker(query_count=3)
+def test_from_to_pandas_datetime64_timezone_support(session, timezone):
+    # This test verifies the conversion behavior for datetime64 with timezone (ie., DatetimeTZDtype), including from and
+    # to pandas:
+    #
+    # DatetimeTZDtype with any_tz => from_pandas => TIMESTAMP_TZ(any_tz) => to_pandas => DatetimeTZDtype(session_tz)
+    #
+    # Note that python connector will convert any TIMESTAMP_TZ to DatetimeTZDtype with the current session/statement
+    # timezone, e.g., 1969-12-31 19:00:00 -0500 will be converted to 1970-00-01 00:00:00 in UTC if the session/statement
+    # parameter TIMEZONE = 'UTC'
+    # TODO: SNOW-871210 no need session parameter change once the bug is fixed
+    try:
+        session.sql(f"alter session set timezone = '{timezone}'").collect()
+
+        def get_series_with_tz(tz):
+            return (
+                native_pd.Series([1] * 3)
+                .astype("int64")
+                .astype(f"datetime64[ns, {tz}]")
+            )
+
+        # same timestamps representing in different time zone
+        test_data_columns = {
+            "utc": get_series_with_tz("UTC"),
+            "pacific": get_series_with_tz("US/Pacific"),
+            "tokyo": get_series_with_tz("Asia/Tokyo"),
+        }
+
+        # expected to_pandas dataframe's timezone is controlled by session/statement parameter TIMEZONE
+        expected_to_pandas = native_pd.DataFrame(
+            {
+                series: test_data_columns[series].dt.tz_convert(timezone)
+                for series in test_data_columns
+            }
+        )
+        assert_snowpark_pandas_equal_to_pandas(
+            pd.DataFrame(test_data_columns),
+            expected_to_pandas,
+            # configure different timezones to to_pandas and verify the timestamps are converted correctly
+            statement_params={"timezone": timezone},
+        )
+    finally:
+        # TODO: SNOW-871210 no need session parameter change once the bug is fixed
+        session.sql("alter session unset timezone").collect()
+
+
+@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "US/Eastern"])
+@sql_count_checker(query_count=3)
+def test_from_to_pandas_datetime64_multi_timezone_current_behavior(session, timezone):
+    try:
+        # TODO: SNOW-871210 no need session parameter change once the bug is fixed
+        session.sql(f"alter session set timezone = '{timezone}'").collect()
+
+        # This test also verifies the current behaviors of to_pandas() for datetime with no tz, same tz, or multi tz:
+        # no tz    => TIMESTAMP_NTZ
+        # same tz  => TIMESTAMP_TZ
+        # multi tz => TIMESTAMP_NTZ
+        multi_tz_data = ["2019-05-21 12:00:00-06:00", "2019-05-21 12:15:00-07:00"]
+        test_data_columns = {
+            "no tz": native_pd.to_datetime(
+                native_pd.Series(["2019-05-21 12:00:00", "2019-05-21 12:15:00"])
+            ),  # dtype = datetime64[ns]
+            "same tz": native_pd.to_datetime(
+                native_pd.Series(
+                    ["2019-05-21 12:00:00-06:00", "2019-05-21 12:15:00-06:00"]
+                )
+            ),  # dtype = datetime64[ns, tz]
+            "multi tz": native_pd.to_datetime(
+                native_pd.Series(multi_tz_data)
+            ),  # dtype = object and value type is Python datetime
+        }
+
+        expected_to_pandas = native_pd.DataFrame(
+            {
+                "no tz": test_data_columns["no tz"],  # dtype = datetime64[ns]
+                "same tz": test_data_columns["same tz"].dt.tz_convert(
+                    timezone
+                ),  # dtype = datetime64[ns, tz]
+                "multi tz": native_pd.Series(
+                    [
+                        native_pd.to_datetime(t).tz_convert(timezone)
+                        for t in multi_tz_data
+                    ]
+                ),
+            }
+        )
+
+        test_df = native_pd.DataFrame(test_data_columns)
+        # dtype checks for each series
+        no_tz_dtype = test_df.dtypes["no tz"]
+        assert is_datetime64_any_dtype(no_tz_dtype) and not isinstance(
+            no_tz_dtype, DatetimeTZDtype
+        )
+        same_tz_dtype = test_df.dtypes["same tz"]
+        assert is_datetime64_any_dtype(same_tz_dtype) and isinstance(
+            same_tz_dtype, DatetimeTZDtype
+        )
+        multi_tz_dtype = test_df.dtypes["multi tz"]
+        assert (
+            not is_datetime64_any_dtype(multi_tz_dtype)
+            and not isinstance(multi_tz_dtype, DatetimeTZDtype)
+            and str(multi_tz_dtype) == "object"
+        )
+        # sample value
+        assert isinstance(test_df["multi tz"][0], datetime.datetime)
+        assert test_df["multi tz"][0].tzinfo is not None
+        assert_snowpark_pandas_equal_to_pandas(
+            pd.DataFrame(test_df),
+            expected_to_pandas,
+            statement_params={"timezone": timezone},
+        )
+    finally:
+        # TODO: SNOW-871210 no need session parameter change once the bug is fixed
+        session.sql("alter session unset timezone").collect()
+
+
+@sql_count_checker(query_count=2)
+def test_from_pandas_duplicate_labels():
+    # Duplicate data labels
+    native_df = native_pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})
+    native_df.columns = ["a", "a", "A"]
+    snow_df = pd.DataFrame(native_df)
+    assert snow_df.columns.tolist() == ["a", "a", "A"]
+
+    # Duplicate between index and data label
+    native_df = native_pd.DataFrame(
+        {"a": [1, 2]}, index=pd.RangeIndex(start=4, stop=6, step=1, name="a")
+    )
+    snow_df = pd.DataFrame(native_df)
+    assert snow_df.columns.tolist() == ["a"]
+    assert snow_df.index.name == "a"
+
+    # Duplicate index labels
+    native_df = native_pd.DataFrame(
+        {"z": [1, 2]},
+        index=native_pd.MultiIndex.from_arrays(
+            [["u", "v"], ["x", "y"]], names=("a", "a")
+        ),
+    )
+    snow_df = pd.DataFrame(native_df)
+    assert snow_df.index.names == native_df.index.names
+
+
+@pytest.mark.parametrize(
+    "table_type, n_rows",
+    [
+        ("temporary", 20000),
+        ("transient", 20000),
+        ("temporary", 100),
+        ("transient", 100),
+    ],
+)
+@sql_count_checker(query_count=8)
+def test_determinism_with_repeated_to_pandas(session, table_type, n_rows) -> None:
+    ref_df = native_pd.DataFrame(
+        {
+            "A": [i for i in range(n_rows)],
+            "B": [i for i in range(n_rows, 2 * n_rows)],
+            "C": [i for i in range(2 * n_rows, 3 * n_rows)],
+        }
+    )
+
+    test_table_name = f"test_table_{generate_random_alphanumeric().upper()}"
+    snowpark_df = session.create_dataframe(ref_df)
+    snowpark_df.write.save_as_table(
+        test_table_name, mode="overwrite", table_type=table_type
+    )
+
+    # create snowpark pandas dataframe
+    df = pd.read_snowflake(test_table_name)
+
+    pandas_df = df.to_pandas()
+    # verify to_pandas gives the same result for the same snowpark pandas
+    # dataframe over different calls
+    for _ in range(5):
+        pandas_df_t = df.to_pandas()
+        assert_frame_equal(pandas_df, pandas_df_t)
+
+
+@pytest.mark.parametrize("pandas_label", VALID_PANDAS_LABELS)
+@sql_count_checker(query_count=1)
+def test_from_pandas_multiindex_on_column(pandas_label):
+    index = pd.MultiIndex.from_tuples(
+        [("baz", "A", 5), ("baz", pandas_label, "5"), ("zoo", "A", ""), ("zoo", "B", 0)]
+    )
+    check_result_from_and_to_pandas([[1, 2, 3, 4]], columns=index, check_dtype=False)
+
+
+@sql_count_checker(query_count=2)
+def test_from_pandas_series_with_tuple_name():
+    # Constructing a DataFrame from a Series with tuple name should produce MultiIndex columns
+    native_ser = native_pd.Series(name=("A", "B"))
+    snow_ser = pd.Series(name=("A", "B"))
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.DataFrame(native_ser),
+        native_pd.DataFrame(native_ser),
+    )
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.DataFrame(snow_ser),
+        native_pd.DataFrame(native_ser),
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_series_to_pandas():
+    array = ["a", "b", "c"]
+    pandas_series = native_pd.Series(data=array, index=array)
+    snow_series = pd.Series(data=array, index=array)
+    assert_series_equal(snow_series.to_pandas(), pandas_series)
+
+
+@sql_count_checker(query_count=2, union_count=1)
+def test_single_row_frame_to_series_to_pandas():
+    # create a Snowpark pandas with single row
+    native_df = native_pd.DataFrame(
+        {"A": [0], "B": [1], "C": [2]}, index=pd.Index(["value"], name="index")
+    )
+    snow_df = pd.DataFrame(native_df)
+    snow_series = pd.Series(query_compiler=snow_df._query_compiler)
+    expected_series = native_df.squeeze()
+    assert_series_equal(snow_series.to_pandas(), expected_series, check_dtype=False)
+
+
+@sql_count_checker(query_count=3)
+def test_empty_variant_type_frame_to_pandas(session):
+    # Tests type conversion of empty dataframes that have columns with the ARRAY and MAP types.
+    # These dataframes must be constructed from pd.read_snowflake to have the correct type, since
+    # using the pandas constructor would give them dtype=object
+    table_name = Utils.random_name_for_temp_object(TempObjectType.TABLE)
+    session.create_dataframe(
+        [],
+        schema=StructType().add("EMPTY_ARRAY", ArrayType()).add("EMPTY_MAP", MapType()),
+    ).write.save_as_table(table_name, table_type="temp")
+    df = pd.read_snowflake(table_name)
+    assert_frame_equal(
+        df.to_pandas(),
+        native_pd.DataFrame(columns=["EMPTY_ARRAY", "EMPTY_MAP"], dtype=object),
+        # Because the table has a __row_position__ column which is used as the index,
+        # Snowpark pandas returns an empty Index(dtype="int64") instead of the expected RangeIndex
+        check_index_type=False,
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_snowpark_pandas_statement_params():
+    with patch.object(snowflake.snowpark.DataFrame, "to_pandas") as mock_to_pandas:
+        pd.DataFrame({"a": [1, 2, 3]}).to_pandas()
+        mock_to_pandas.assert_called_once()
+        assert (
+            "pandas"
+            == mock_to_pandas.call_args.kwargs["statement_params"]["SNOWPARK_API"]
+        )
+
+    with patch.object(snowflake.snowpark.DataFrame, "to_pandas") as mock_to_pandas:
+        pd.DataFrame({"a": [1, 2, 3]}).to_pandas(statement_params={"abc": "efg"})
+        mock_to_pandas.assert_called_once()
+        assert (
+            "pandas"
+            == mock_to_pandas.call_args.kwargs["statement_params"]["SNOWPARK_API"]
+        )
+        assert "efg" == mock_to_pandas.call_args.kwargs["statement_params"]["abc"]
diff --git a/tests/integ/modin/test_internal_frame.py b/tests/integ/modin/test_internal_frame.py
new file mode 100644
index 00000000000..da38322b9a2
--- /dev/null
+++ b/tests/integ/modin/test_internal_frame.py
@@ -0,0 +1,51 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+
+
+@pytest.mark.parametrize(
+    "input, expected",
+    [
+        (
+            native_pd.DataFrame({"A": [0, 1, 0, 1, 2], "B": [1, 2, 3, 4, 5]}),
+            native_pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}, index=[2, 3, 4]),
+        ),
+        (
+            native_pd.DataFrame({"A": [0, 0, 0, 0], "B": [1, 2, 3, 4]}),
+            native_pd.DataFrame({"A": [0], "B": [4]}, index=[3]),
+        ),
+    ],
+)
+@sql_count_checker(query_count=2, join_count=1)
+def test_strip_duplicates(input, expected):
+    snow_df = pd.DataFrame(input)
+    internal_frame: InternalFrame = snow_df._query_compiler._modin_frame
+    internal_frame = internal_frame.strip_duplicates(
+        internal_frame.data_column_snowflake_quoted_identifiers[:1]
+    )
+    result = pd.DataFrame(query_compiler=SnowflakeQueryCompiler(internal_frame))
+    assert_frame_equal(result, pd.DataFrame(expected))
+
+
+@sql_count_checker(query_count=2, join_count=1)
+def test_strip_duplicates_after_sort():
+    df = pd.DataFrame({"A": [0, 1, 0, 1, 2], "B": [1, 2, 3, 4, 5]})
+    df = df.sort_values(by="B", ascending=False)
+    internal_frame: InternalFrame = df._query_compiler._modin_frame
+    internal_frame = internal_frame.strip_duplicates(
+        internal_frame.data_column_snowflake_quoted_identifiers[:1]
+    )
+    result = pd.DataFrame(query_compiler=SnowflakeQueryCompiler(internal_frame))
+    expected = pd.DataFrame({"A": [2, 1, 0], "B": [5, 2, 1]}, index=[4, 1, 0])
+    assert_frame_equal(result, expected)
diff --git a/tests/integ/modin/test_merge.py b/tests/integ/modin/test_merge.py
new file mode 100644
index 00000000000..7ba4a791522
--- /dev/null
+++ b/tests/integ/modin/test_merge.py
@@ -0,0 +1,108 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+
+# This file contains tests for pd.merge.  pd.merge is just a wrapper on top of
+# DataFrame.merge method, so we didn't duplicate all test cases but only
+# write tests for differences.
+# Only difference between these APIs is that pd.merge allows a Series on left side of
+# merge but DataFrame.merge can only have DataFrame on left side of merge.
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+
+
+@pytest.fixture(scope="function")
+def left_df():
+    return pd.DataFrame(
+        {
+            "A": [3, 2, 1, 4, 4],
+            "B": [2, 3, 1, 2, 1],
+        },
+        index=native_pd.Index([0, 1, 3, 2, 4], name="left_i"),
+    )
+
+
+@pytest.fixture(scope="function")
+def right_df():
+    return pd.DataFrame(
+        {
+            "A": [4, 3, 1, 4, 4],
+            "C": [3, 4, 2, 1, 1],
+        },
+        index=native_pd.Index([8, 4, 2, 9, 1], name="right_i"),
+    )
+
+
+@pytest.fixture(scope="function")
+def unnamed_series():
+    return pd.Series([1, 2, 3])
+
+
+@pytest.fixture(scope="function")
+def named_series():
+    return pd.Series([1, 2, 3], name="S")
+
+
+@pytest.fixture(params=["left", "inner", "right", "outer"])
+def how(request):
+    """
+    how keyword to pass to merge.
+    """
+    return request.param
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_merge(left_df, right_df, how):
+    res = pd.merge(left_df, right_df, on="A", how=how)
+    expected = left_df.merge(right_df, on="A", how=how)
+    assert_frame_equal(res, expected)
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_merge_series_on_left(named_series, right_df, how):
+    res = pd.merge(named_series, right_df, left_on="S", right_on="A", how=how)
+    expected = named_series.to_frame().merge(
+        right_df, left_on="S", right_on="A", how=how
+    )
+    assert_frame_equal(res, expected)
+
+
+@sql_count_checker(query_count=2)
+def test_merge_unnamed_series_negative(unnamed_series, right_df):
+    with pytest.raises(ValueError) as pd_e:
+        native_pd.merge(unnamed_series.to_pandas(), right_df.to_pandas())
+    with pytest.raises(ValueError) as snow_e:
+        pd.merge(unnamed_series, right_df)
+    assert str(pd_e.value) == str(snow_e.value)
+
+
+@sql_count_checker(query_count=1)
+def test_merge_native_pandas_object_negative(left_df, right_df):
+    left_native = left_df.to_pandas()
+    msg = (
+        f"{type(left_native)} is not supported as 'value' argument. Please convert this to Snowpark pandas"
+        r" objects by calling modin.pandas.Series\(\)/DataFrame\(\)"
+    )
+    # Left frame as native pandas object
+    with pytest.raises(TypeError, match=msg):
+        pd.merge(left_native, right_df, on="A")
+
+    # right frame as native pandas object
+    with pytest.raises(TypeError, match=msg):
+        pd.merge(right_df, left_native, on="A")
+
+
+@sql_count_checker(query_count=1)
+def test_merge_invalid_object_type_negative(left_df):
+    right_df = "abc"
+    with pytest.raises(TypeError) as pd_e:
+        native_pd.merge(left_df.to_pandas(), right_df)
+    with pytest.raises(TypeError) as snow_e:
+        pd.merge(left_df, right_df)
+    assert str(pd_e.value) == str(snow_e.value)
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
new file mode 100644
index 00000000000..bd32d2b682b
--- /dev/null
+++ b/tests/integ/modin/test_numpy.py
@@ -0,0 +1,239 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from numpy.testing import assert_array_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+
+
+@pytest.mark.parametrize(
+    ("cond", "query_count"),
+    [
+        (lambda df: df["A"] >= 1, 1),
+        (lambda df: True, 1),
+        (lambda df: False, 1),
+    ],
+    ids=[
+        "Conditional Column Operator",
+        "True",
+        "False",
+    ],
+)
+def test_np_where(cond, query_count):
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=query_count if not isinstance(cond, bool) else 0):
+        snow_result = np.where(cond(snow_df), snow_df["A"], snow_df["A"] - 1)
+        pandas_result = np.where(cond(pandas_df), pandas_df["A"], pandas_df["A"] - 1)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+
+def test_logical_operators():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=1):
+        # Test simple logical not
+        snow_result = np.logical_not(snow_df["B"])
+        pandas_result = np.logical_not(pandas_df["B"])
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        # Test for chaining pandas and numpy calls
+        snow_result = np.logical_not(snow_df["C"].isin(["a", "d"]))
+        pandas_result = np.logical_not(pandas_df["C"].isin(["a", "d"]))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        # Test binary logical operator
+        snow_result = np.logical_and(snow_df["B"], snow_df["A"])
+        pandas_result = np.logical_and(pandas_df["B"], pandas_df["A"])
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+
+def test_np_ufunc_operators():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+    pandas_df = native_pd.DataFrame(data)
+
+    with SqlCounter(query_count=1):
+        # Test numpy ufunc with scalar
+        snow_result = np.add(snow_df["A"], 1)
+        pandas_result = np.add(pandas_df["A"], 1)
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        # Test binary numpy ufunc
+        snow_result = np.add(snow_df["A"], snow_df["A"])
+        pandas_result = np.add(pandas_df["A"], pandas_df["A"])
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+    with SqlCounter(query_count=1):
+        # Test chained numpy ufuncs
+        snow_result = np.add(snow_df["A"], np.add(snow_df["A"], 1))
+        pandas_result = np.add(pandas_df["A"], np.add(pandas_df["A"], 1))
+        assert_array_equal(np.array(snow_result), np.array(pandas_result))
+
+
+def test_np_where_notimplemented():
+    data = {
+        "A": [0, 1, 2, 0, 1, 2, 0, 1, 2],
+        "B": [True, False, True, True, False, True, False, False, False],
+        "C": ["a", "b", "c", "d", "a", "b", "c", "d", "e"],
+    }
+    snow_df = pd.DataFrame(data)
+
+    with SqlCounter(query_count=0):
+        with pytest.raises(TypeError):
+            np.where(
+                np.array([True, False, True, True, False, True, False, False, False]),
+                snow_df["A"],
+                -1,
+            )
+
+
+@sql_count_checker(query_count=3, join_count=2)
+def test_scalar():
+    pdf_scalar = native_pd.DataFrame([[99, 99], [99, 99]])
+    sdf_scalar = pd.DataFrame([[99, 99], [99, 99]])
+    pdf_cond = native_pd.DataFrame([[True, False], [False, True]])
+    sdf_cond = pd.DataFrame(pdf_cond)
+
+    # pandas
+    pdf_result = pdf_scalar.where(pdf_cond, -99)
+    sdf_result = sdf_scalar.where(sdf_cond, -99)
+
+    assert_array_equal(sdf_result, pdf_result)
+
+    # numpy
+    np_orig_result = np.where(pdf_cond, 99, -99)
+    sp_result = np.where(sdf_cond, 99, -99)
+    assert_array_equal(sp_result, np_orig_result)
+
+
+@pytest.mark.parametrize(
+    "cond",
+    [0, 1, 2],
+    ids=["cDF", "cDF2", "cScalar"],
+)
+@pytest.mark.parametrize(
+    "x",
+    [0, 1, 2],
+    ids=["xDF", "xDF2", "xScalar"],
+)
+@pytest.mark.parametrize(
+    "y",
+    [0, 1, 2],
+    ids=["yDF", "yDF2", "yScalar"],
+)
+def test_different_inputs(cond, x, y):
+    input_df = native_pd.DataFrame([[1, 0], [0, 1]])
+    input_df2 = native_pd.DataFrame([[99, 99], [99, 99]])
+    native_inputs = [input_df, input_df2, -99]
+
+    snow_inputs = [pd.DataFrame(input_df), pd.DataFrame(input_df2), -99]
+    np_orig_result = np.where(
+        native_inputs[cond] == 0, native_inputs[x], native_inputs[y]
+    )
+
+    with SqlCounter(no_check=True):
+        sp_result = np.where(snow_inputs[cond] == 0, snow_inputs[x], snow_inputs[y])
+        assert_array_equal(sp_result, np_orig_result)
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_broadcast_scalar_x_df():
+    input_df = native_pd.DataFrame([[False, True], [False, True]])
+    input_df2 = native_pd.DataFrame([[1, 0], [0, 1]])
+    snow_df = pd.DataFrame(input_df)
+    snow_df2 = pd.DataFrame(input_df2)
+    snow_result = np.where(snow_df, -99, snow_df2)
+    np_result = np.where(input_df, -99, input_df2)
+    assert_array_equal(snow_result, np_result)
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_broadcast_scalar_x_ser():
+    input_ser = native_pd.Series([False, True])
+    input_ser2 = native_pd.Series([1, 0])
+    snow_ser = pd.Series(input_ser)
+    snow_ser2 = pd.Series(input_ser2)
+    snow_result = np.where(snow_ser, -99, snow_ser2)
+    np_result = np.where(input_ser, -99, input_ser2)
+    assert_array_equal(snow_result, np_result)
+
+
+@sql_count_checker(query_count=1, join_count=1)
+def test_scalar_y_df():
+    input_df = native_pd.DataFrame([[False, True], [False, True]])
+    input_df2 = native_pd.DataFrame([[1, 0], [0, 1]])
+    snow_df = pd.DataFrame(input_df)
+    snow_df2 = pd.DataFrame(input_df2)
+    snow_result = np.where(snow_df, snow_df2, -99)
+    np_result = np.where(input_df, input_df2, -99)
+    assert_array_equal(snow_result, np_result)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_where_with_same_indexes():
+    pdf1 = native_pd.DataFrame([0, 1], index=["a", "b"])
+    pdf2 = native_pd.DataFrame([1, 0], index=["a", "b"])
+    sdf1 = pd.DataFrame(pdf1)
+    sdf2 = pd.DataFrame(pdf2)
+    numpy_result = np.where(pdf1 == 0, pdf2, pdf1)
+    snow_result = np.where(sdf1 == 0, sdf2, sdf1)
+    assert_array_equal(snow_result, numpy_result)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_where_with_different_indexes():
+    pdf1 = native_pd.DataFrame([0, 1], index=["a", "b"])
+    pdf2 = native_pd.DataFrame([1, 0], index=["b", "a"])
+    sdf1 = pd.DataFrame(pdf1)
+    sdf2 = pd.DataFrame(pdf2)
+    numpy_result = np.where(pdf1 == 0, pdf2, pdf1)
+    snow_result = np.where(sdf1 == 0, sdf2, sdf1)
+    with pytest.raises(AssertionError):
+        assert_array_equal(snow_result, numpy_result)
+
+
+@sql_count_checker(query_count=1, join_count=2)
+def test_where_with_same_columns():
+    pdf1 = native_pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=["k", "w", "v"])
+    pdf2 = native_pd.DataFrame([[6, 7, 8], [9, 10, 11]], columns=["k", "w", "v"])
+    sdf1 = pd.DataFrame(pdf1)
+    sdf2 = pd.DataFrame(pdf2)
+    numpy_result = np.where(pdf1 == 0, pdf2, pdf1)
+    snow_result = np.where(sdf1 == 0, sdf2, sdf1)
+    assert_array_equal(snow_result, numpy_result)
+
+
+@sql_count_checker(query_count=0, join_count=0)
+def test_where_with_different_columns_negative():
+    pdf1 = native_pd.DataFrame([[0, 1, 0], [3, 0, 5]], columns=["w", "v", "k"])
+    pdf2 = native_pd.DataFrame([[6, 7, 8], [9, 10, 11]], columns=["k", "v", "w"])
+    sdf1 = pd.DataFrame(pdf1)
+    sdf2 = pd.DataFrame(pdf2)
+    with pytest.raises(TypeError):
+        np.where(sdf1 == 0, sdf2, sdf1)
diff --git a/tests/integ/modin/test_ordered_dataframe.py b/tests/integ/modin/test_ordered_dataframe.py
new file mode 100644
index 00000000000..eed1489054b
--- /dev/null
+++ b/tests/integ/modin/test_ordered_dataframe.py
@@ -0,0 +1,1082 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Optional, Union
+from unittest.mock import patch
+
+import numpy as np
+import pandas as native_pd
+import pandas as pd
+import pytest
+
+import snowflake.snowpark
+from snowflake.snowpark import Session
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from snowflake.snowpark.functions import (
+    col,
+    flatten,
+    max as max_,
+    min as min_,
+    to_array,
+)
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import unquote_name_if_quoted
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_frame_equal,
+    get_snowpark_dataframe_quoted_identifiers,
+)
+
+
+@pytest.fixture
+def df1():
+    return native_pd.DataFrame(
+        {
+            "A": [1, 1, 0, 4],
+            "B": [2, 1, 5, 3],
+            "C": [3.0, np.nan, 4.0, 5.0],
+            "row_pos": [0, 1, 2, 3],
+        }
+    )
+
+
+@pytest.fixture
+def df2():
+    return native_pd.DataFrame(
+        {"A": [1, 2, np.nan], "B": [2, 6, 4], "C": [1, 4, 4], "row_pos": [0, 1, 2]}
+    )
+
+
+@pytest.fixture(scope="function")
+def ordered_df(session):
+    return OrderedDataFrame(
+        DataFrameReference(
+            session.create_dataframe(
+                [[0, 4, 5, 6], [1, 1, 2, 3]],
+                schema=['"row_position"', '"a"', '"b"', '"c"'],
+            ),
+            snowflake_quoted_identifiers=['"row_position"', '"a"', '"b"', '"c"'],
+        ),
+        projected_column_snowflake_quoted_identifiers=['"row_position"', '"a"', '"c"'],
+        ordering_columns=[OrderingColumn('"b"'), OrderingColumn('"c"')],
+        row_position_snowflake_quoted_identifier='"row_position"',
+    )
+
+
+def _verify_dataframe_reference(
+    ordered_df1: OrderedDataFrame, ordered_df2: OrderedDataFrame, should_match: bool
+):
+    if should_match:
+        assert ordered_df1._dataframe_ref == ordered_df2._dataframe_ref
+        assert (
+            ordered_df1._dataframe_ref.snowpark_dataframe
+            == ordered_df2._dataframe_ref.snowpark_dataframe
+        )
+        assert ordered_df1._dataframe_ref._id == ordered_df2._dataframe_ref._id
+    else:
+        assert ordered_df1._dataframe_ref != ordered_df2._dataframe_ref
+        assert (
+            ordered_df1._dataframe_ref.snowpark_dataframe
+            != ordered_df2._dataframe_ref.snowpark_dataframe
+        )
+        assert ordered_df1._dataframe_ref._id != ordered_df2._dataframe_ref._id
+
+
+def _verify_dataframe_reference_quoted_identifiers(ordered_df: OrderedDataFrame):
+    dataframe_ref = ordered_df._dataframe_ref
+    snowpark_dataframe_quoted_identifiers = get_snowpark_dataframe_quoted_identifiers(
+        dataframe_ref.snowpark_dataframe
+    )
+    assert (
+        dataframe_ref.snowflake_quoted_identifiers
+        == snowpark_dataframe_quoted_identifiers
+    )
+
+
+def _create_ordered_dataframe(
+    session: Session,
+    pandas_df: native_pd.DataFrame,
+    projected_columns: Optional[list[str]] = None,
+    ordering_columns: Optional[list[str]] = None,
+    row_position_column: Optional[str] = None,
+):
+    snowpark_df = session.create_dataframe(pandas_df)
+    column_quoted_identifiers = [
+        f.column_identifier.quoted_name for f in snowpark_df.schema.fields
+    ]
+    if projected_columns is not None:
+        projected_columns_snowflake_quoted_identifiers = projected_columns
+    else:
+        projected_columns_snowflake_quoted_identifiers = column_quoted_identifiers
+    if ordering_columns is not None:
+        orderings = [OrderingColumn(col) for col in ordering_columns]
+    else:
+        orderings = [
+            OrderingColumn(col)
+            for col in projected_columns_snowflake_quoted_identifiers
+        ]
+    ordered_df = OrderedDataFrame(
+        dataframe_ref=DataFrameReference(
+            snowpark_df, snowflake_quoted_identifiers=column_quoted_identifiers
+        ),
+        projected_column_snowflake_quoted_identifiers=projected_columns_snowflake_quoted_identifiers,
+        ordering_columns=orderings,
+        row_position_snowflake_quoted_identifier=row_position_column,
+    )
+    return ordered_df
+
+
+def _unquote_and_validate_snowflake_quoted_identifiers(
+    quoted_identifier: str,
+    validation_str: str,
+    prefix_match: bool = True,
+) -> str:
+    """
+    Unquote the quoted_identifier and validate the unquoted identifier against the
+    given validation. If prefix_match is set to true, check that the quoted identifier
+    starts with the given validation_str, otherwise, check the unquoted identifier is
+    exactly the same as the validation str.
+
+    Returns:
+        The unquoted identifier
+    """
+    unquoted_identifier = unquote_name_if_quoted(quoted_identifier)
+    if prefix_match:
+        assert unquoted_identifier != validation_str
+        assert unquoted_identifier.startswith(validation_str)
+    else:
+        assert unquoted_identifier == validation_str
+
+    return unquoted_identifier
+
+
+def _join_or_align_result_validation_helper(
+    result_ordered_df: OrderedDataFrame,
+    left_ordered_df: OrderedDataFrame,
+    right_ordered_df: OrderedDataFrame,
+    left_pandas_df: pd.DataFrame,
+    right_pandas_df: pd.DataFrame,
+    left_on_cols: Optional[list[str]],
+    right_on_cols: Optional[list[str]],
+    how: str,
+    sort: bool = False,
+):
+    # verify the result columns have columns in order of left + right de-conflicted
+    left_unquoted_names = [
+        unquote_name_if_quoted(quoted_identifier)
+        for quoted_identifier in left_ordered_df.projected_column_snowflake_quoted_identifiers
+    ]
+    for quoted_identifier, validation_label in zip(
+        result_ordered_df.projected_column_snowflake_quoted_identifiers[
+            : len(left_pandas_df.columns)
+        ],
+        left_unquoted_names,
+    ):
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            quoted_identifier, validation_label, prefix_match=False
+        )
+
+    right_unquoted_names = [
+        unquote_name_if_quoted(quoted_identifier)
+        for quoted_identifier in right_ordered_df.projected_column_snowflake_quoted_identifiers
+    ]
+    right_labels = [
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            quoted_identifier,
+            validation_label,
+            prefix_match=(validation_label in left_unquoted_names),
+        )
+        for quoted_identifier, validation_label in zip(
+            result_ordered_df.projected_column_snowflake_quoted_identifiers[
+                len(left_pandas_df.columns) : len(left_pandas_df.columns)
+                + len(right_pandas_df.columns)
+            ],
+            right_unquoted_names,
+        )
+    ]
+
+    right_name_map = dict(zip(right_unquoted_names, right_labels))
+    left_on_labels = None
+    right_on_labels = None
+    if left_on_cols is not None:
+        left_on_labels = [
+            unquote_name_if_quoted(quoted_identifier)
+            for quoted_identifier in left_on_cols
+        ]
+    if right_on_cols is not None:
+        right_on_labels = [
+            unquote_name_if_quoted(quoted_identifier)
+            for quoted_identifier in right_on_cols
+        ]
+        right_on_labels = [right_name_map[label] for label in right_on_labels]
+
+    # construct pandas result for comparing
+    right_pandas_df.columns = right_labels
+    expected_df = left_pandas_df.merge(
+        right_pandas_df,
+        left_on=left_on_labels,
+        right_on=right_on_labels,
+        how=how,
+        sort=sort,
+    )
+
+    projected_ordered_df = result_ordered_df
+    if len(result_ordered_df.projected_column_snowflake_quoted_identifiers) > len(
+        left_pandas_df.columns
+    ) + len(right_pandas_df.columns):
+        projected_ordered_df = result_ordered_df.select(
+            result_ordered_df.projected_column_snowflake_quoted_identifiers[
+                : len(left_pandas_df.columns) + len(right_pandas_df.columns)
+            ]
+        )
+    assert_frame_equal(projected_ordered_df, expected_df, check_dtype=False)
+
+
+def _verify_order_by_query(
+    dataframe: Union[OrderedDataFrame, SnowparkDataFrame], should_include_order_by: bool
+) -> None:
+    if should_include_order_by:
+        assert "order by" in dataframe.queries["queries"][0].lower()
+    else:
+        assert "order by" not in dataframe.queries["queries"][0].lower()
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["left", "right", "inner", "outer", "cross"],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_join_no_column_conflict(session, df1, df2, how):
+    if how == "outer":
+        pytest.xfail("SNOW-1321662 - outer join issue")
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+
+    # reset the name of df2 to avoid duplication with df1
+    df2.columns = ["D", "E", "F", "row_pos2"]
+    ordered_df2 = _create_ordered_dataframe(
+        session, df2, ordering_columns=['"row_pos2"'], row_position_column='"row_pos2"'
+    )
+
+    # join df1 and df2
+    if how == "cross":
+        left_on_cols = None
+        right_on_cols = None
+        joined_ordered_df = ordered_df1.join(ordered_df2, how=how)
+    else:
+        left_on_cols = ['"B"']
+        right_on_cols = ['"F"']
+        joined_ordered_df = ordered_df1.join(
+            ordered_df2, left_on_cols=left_on_cols, right_on_cols=right_on_cols, how=how
+        )
+
+    # verify joined ordered df have the final ordering column correct
+    if how == "right":
+        assert joined_ordered_df.ordering_column_snowflake_quoted_identifiers == [
+            '"row_pos2"',
+            '"row_pos"',
+        ]
+    else:
+        assert joined_ordered_df.ordering_column_snowflake_quoted_identifiers == [
+            '"row_pos"',
+            '"row_pos2"',
+        ]
+
+    # verify the result columns have columns in order of left + right
+    assert joined_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"A"',
+        '"B"',
+        '"C"',
+        '"row_pos"',
+        '"D"',
+        '"E"',
+        '"F"',
+        '"row_pos2"',
+    ]
+
+    # verify the join result doesn't share the same dataframe reference as ordered_df1 or ordered_df2
+    _verify_dataframe_reference(joined_ordered_df, ordered_df1, should_match=False)
+    _verify_dataframe_reference(joined_ordered_df, ordered_df2, should_match=False)
+    _verify_dataframe_reference_quoted_identifiers(joined_ordered_df)
+
+    _join_or_align_result_validation_helper(
+        joined_ordered_df,
+        ordered_df1,
+        ordered_df2,
+        df1,
+        df2,
+        left_on_cols=left_on_cols,
+        right_on_cols=right_on_cols,
+        how=how,
+    )
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["left", "right", "inner", "outer", "cross"],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_join_with_column_conflict(session, df1, df2, how):
+    if how == "outer":
+        pytest.xfail("SNOW-1321662 - outer join issue")
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+
+    # reset the name of df2 to avoid duplication with df1
+    ordered_df2 = _create_ordered_dataframe(
+        session, df2, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+
+    if how == "cross":
+        left_on_cols = None
+        right_on_cols = None
+        joined_ordered_df = ordered_df1.join(ordered_df2, how=how)
+    else:
+        left_on_cols = ['"C"', '"row_pos"']
+        right_on_cols = ['"C"', '"row_pos"']
+        joined_ordered_df = ordered_df1.join(
+            ordered_df2,
+            left_on_cols=left_on_cols,
+            right_on_cols=right_on_cols,
+            how=how,
+        )
+
+    _verify_dataframe_reference(joined_ordered_df, ordered_df1, should_match=False)
+    _verify_dataframe_reference(joined_ordered_df, ordered_df2, should_match=False)
+    _verify_dataframe_reference_quoted_identifiers(joined_ordered_df)
+
+    # verify joined ordered df have the final ordering column correct
+    if how == "right":
+        # the first ordering comes from right, which should be the one after de-duplication
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            joined_ordered_df.ordering_column_snowflake_quoted_identifiers[0],
+            "row_pos",
+            prefix_match=True,
+        )
+        # the second ordering comes from left, which should be the original identifier
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            joined_ordered_df.ordering_column_snowflake_quoted_identifiers[1],
+            "row_pos",
+            prefix_match=False,
+        )
+    else:
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            joined_ordered_df.ordering_column_snowflake_quoted_identifiers[0],
+            "row_pos",
+            prefix_match=False,
+        )
+        _unquote_and_validate_snowflake_quoted_identifiers(
+            joined_ordered_df.ordering_column_snowflake_quoted_identifiers[1],
+            "row_pos",
+            prefix_match=True,
+        )
+
+    _join_or_align_result_validation_helper(
+        joined_ordered_df,
+        ordered_df1,
+        ordered_df2,
+        df1,
+        df2,
+        left_on_cols=left_on_cols,
+        right_on_cols=right_on_cols,
+        how=how,
+    )
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["left", "outer"],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_align_on_matching_columns(session, how):
+    df1 = native_pd.DataFrame({"A": [3, 2, 3], "B": [2, 2, 1], "row_pos": [0, 1, 2]})
+    df2 = native_pd.DataFrame(
+        {"A_2": [2, 3, 3], "B_2": [2, 2, 1], "row_pos_2": [0, 1, 2]}
+    )
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+    ordered_df2 = _create_ordered_dataframe(
+        session,
+        df2,
+        ordering_columns=['"row_pos_2"'],
+        row_position_column='"row_pos_2"',
+    )
+
+    aligned_ordered_df = ordered_df1.align(
+        ordered_df2,
+        left_on_cols=['"B"', '"row_pos"'],
+        right_on_cols=['"B_2"', '"row_pos_2"'],
+        how=how,
+    )
+
+    # verify the aligned result have projected columns started with left and right
+    expected_columns = [
+        '"A"',
+        '"B"',
+        '"row_pos"',
+        '"A_2"',
+        '"B_2"',
+        '"row_pos_2"',
+    ]
+    assert (
+        aligned_ordered_df.projected_column_snowflake_quoted_identifiers[:6]
+        == expected_columns
+    )
+
+    # verify the aligned result doesn't share the same dataframe reference with original dataframe
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df1, should_match=False)
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df2, should_match=False)
+    _verify_dataframe_reference_quoted_identifiers(aligned_ordered_df)
+
+    # drop off the ordering columns from the projected columns for result comparison
+    projected_result = aligned_ordered_df.select(expected_columns)
+    expected_df = native_pd.DataFrame(
+        {
+            "A": [3, 2, 3],
+            "B": [2, 2, 1],
+            "row_pos": [0, 1, 2],
+            "A_2": [2, 3, 3],
+            "B_2": [2, 2, 1],
+            "row_pos_2": [0, 1, 2],
+        }
+    )
+    assert_frame_equal(projected_result, expected_df, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["outer"],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_align_on_mismatch_columns(session, df1, df2, how):
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+    ordered_df2 = _create_ordered_dataframe(
+        session, df2, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+
+    left_on_cols = ['"C"']
+    right_on_cols = ['"C"']
+    aligned_ordered_df = ordered_df1.align(
+        ordered_df2,
+        left_on_cols=['"C"'],
+        right_on_cols=['"C"'],
+        how=how,
+    )
+
+    # verify the underneath dataframe reference is not shared
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df1, should_match=False)
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df2, should_match=False)
+    _verify_dataframe_reference_quoted_identifiers(aligned_ordered_df)
+
+    sort = False
+    if how == "outer":
+        sort = True
+    _join_or_align_result_validation_helper(
+        aligned_ordered_df,
+        ordered_df1,
+        ordered_df2,
+        df1,
+        df2,
+        left_on_cols=left_on_cols,
+        right_on_cols=right_on_cols,
+        how=how,
+        sort=sort,
+    )
+
+
+@pytest.mark.parametrize("how", ["inner", "left", "right", "outer"])
+def test_self_join_on_row_position_column(ordered_df, how):
+    for right in [ordered_df, ordered_df.select('"row_position"', '"a"')]:
+        joined_ordered_df = ordered_df.join(
+            right,
+            left_on_cols=[ordered_df.row_position_snowflake_quoted_identifier],
+            right_on_cols=[right.row_position_snowflake_quoted_identifier],
+            how=how,
+        )
+        with SqlCounter(query_count=1, join_count=0):
+            joined_ordered_df.collect()
+        _verify_dataframe_reference(joined_ordered_df, ordered_df, True)
+        _verify_dataframe_reference_quoted_identifiers(joined_ordered_df)
+        assert all(
+            column
+            in joined_ordered_df.projected_column_snowflake_quoted_identifiers
+            + joined_ordered_df.ordering_column_snowflake_quoted_identifiers
+            for column in ordered_df.projected_column_snowflake_quoted_identifiers
+            + ordered_df.ordering_column_snowflake_quoted_identifiers
+        )
+        assert joined_ordered_df.ordering_columns == ordered_df.ordering_columns
+        assert (
+            joined_ordered_df.row_position_snowflake_quoted_identifier
+            == ordered_df.row_position_snowflake_quoted_identifier
+        )
+
+
+def test_self_cross_join_on_row_position_column(ordered_df):
+    joined_ordered_df = ordered_df.join(
+        ordered_df,
+        how="cross",
+    )
+    # we can't optimize cross join, so it still happens
+    with SqlCounter(query_count=1, join_count=1):
+        joined_ordered_df.collect()
+
+    _verify_dataframe_reference(joined_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(joined_ordered_df)
+    assert joined_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_schema(ordered_df):
+    def get_schema_field_quoted_identifiers(df: OrderedDataFrame):
+        return [f.column_identifier.quoted_name for f in df.schema.fields]
+
+    assert get_schema_field_quoted_identifiers(ordered_df) == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+    ]
+    assert get_schema_field_quoted_identifiers(ordered_df.select('"a"')) == ['"a"']
+    assert get_schema_field_quoted_identifiers(
+        ordered_df.select('"a"', '"c"', '"row_position"')
+    ) == ['"a"', '"c"', '"row_position"']
+
+
+@sql_count_checker(query_count=0)
+def test_select_basic(ordered_df):
+    # select column with identifiers (str) and a column object with an alias
+    # dataframe reference is shared and ordering columns are kept
+    new_ordered_df = ordered_df.select('"a"', '"c"', col('"c"').as_('"c1"'))
+    _verify_dataframe_reference(new_ordered_df, ordered_df, True)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"c"',
+        '"c1"',
+    ]
+    assert (
+        new_ordered_df.row_position_snowflake_quoted_identifier
+        == ordered_df.row_position_snowflake_quoted_identifier
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_select_star(ordered_df):
+    # select '*', i.e., select all projected columns
+    new_ordered_df = ordered_df.select("*")
+    _verify_dataframe_reference(new_ordered_df, ordered_df, True)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == ordered_df.projected_column_snowflake_quoted_identifiers
+    )
+    assert (
+        new_ordered_df.row_position_snowflake_quoted_identifier
+        == ordered_df.row_position_snowflake_quoted_identifier
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_table_function_call(ordered_df):
+    # select a table function call. The dataframe reference is not shared
+    new_ordered_df = ordered_df.select(flatten(to_array('"a"')), '"c"')
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+
+
+@sql_count_checker(query_count=0)
+def test_select_negative(ordered_df):
+    # selecting a column with conflicting alias name is invalid
+    with pytest.raises(SnowparkSQLException):
+        ordered_df.select(col('"c1"').as_('"c"'))
+        ordered_df.to_pandas()
+
+    # selecting a column with no alias name is invalid
+    with pytest.raises(AssertionError, match="only column with alias name is allowed"):
+        ordered_df.select(col('"c1"'))
+
+    # selecting a column not in project columns is invalid
+    with pytest.raises(AssertionError, match="is not in active columns"):
+        ordered_df.select('"e"')
+
+
+@sql_count_checker(query_count=0)
+def test_sort(ordered_df):
+    new_ordered_df = ordered_df.sort([OrderingColumn('"a"'), OrderingColumn('"c"')])
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.ordering_columns == [
+        OrderingColumn('"a"'),
+        OrderingColumn('"c"'),
+    ]
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == ordered_df.projected_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+    # verify sort doesn't actually generate order by clause
+    _verify_order_by_query(new_ordered_df, should_include_order_by=False)
+
+    # selecting the same ordering columns will return the original ordered dataframe
+    assert (
+        new_ordered_df.sort([OrderingColumn('"a"'), OrderingColumn('"c"')])
+        is new_ordered_df
+    )
+
+    with pytest.raises(AssertionError, match="not found in"):
+        new_ordered_df.sort(OrderingColumn('"b"'))
+
+
+@sql_count_checker(query_count=0)
+def test_ensure_row_position_column(ordered_df):
+    assert ordered_df.ensure_row_position_column() is ordered_df
+
+    ordered_df.row_position_snowflake_quoted_identifier = None
+    assert ordered_df.row_position_snowflake_quoted_identifier is None
+
+    new_ordered_df = ordered_df.ensure_row_position_column()
+    _verify_dataframe_reference(new_ordered_df, ordered_df, True)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+        '"__row_position__"',
+    ]
+    # verify ordering column does not change
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert (
+        new_ordered_df.row_position_snowflake_quoted_identifier == '"__row_position__"'
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_ensure_row_position_column_with_row_position_unprojected(ordered_df):
+    # do a select to un-project the row position column
+    ordered_dataframe = ordered_df.select('"a"', '"b"', '"c"')
+
+    # check the row_position_snowflake_quoted_identifier is still there
+    assert ordered_dataframe.row_position_snowflake_quoted_identifier is not None
+
+    ordered_dataframe = ordered_dataframe.ensure_row_position_column()
+
+    # verify the row position column is projected back
+    assert ordered_dataframe.projected_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"b"',
+        '"c"',
+        '"row_position"',
+    ]
+
+
+@sql_count_checker(query_count=0)
+def test_ensure_row_count_column(ordered_df):
+    ordered_df = ordered_df.ensure_row_count_column()
+    assert ordered_df.ensure_row_count_column() is ordered_df
+    assert ordered_df.row_count_snowflake_quoted_identifier == '"__row_count__"'
+
+    ordered_df.row_count_snowflake_quoted_identifier = None
+    assert ordered_df.row_count_snowflake_quoted_identifier is None
+
+    new_ordered_df = ordered_df.ensure_row_count_column()
+    _verify_dataframe_reference(new_ordered_df, ordered_df, True)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+        '"__row_count__"',
+        new_ordered_df.row_count_snowflake_quoted_identifier,
+    ]
+    assert new_ordered_df.row_count_snowflake_quoted_identifier != '"__row_count__"'
+    # verify ordering column does not change
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+
+
+@sql_count_checker(query_count=0)
+def test_ensure_row_count_column_with_row_count_unprojected(ordered_df):
+    ordered_df = ordered_df.ensure_row_count_column()
+
+    # do a select to un-project the row position column
+    ordered_dataframe = ordered_df.select('"a"', '"b"', '"c"')
+
+    # check the row_position_snowflake_quoted_identifier is still there
+    assert ordered_dataframe.row_count_snowflake_quoted_identifier is not None
+
+    ordered_dataframe = ordered_dataframe.ensure_row_count_column()
+
+    # verify the row position column is projected back
+    assert ordered_dataframe.projected_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"b"',
+        '"c"',
+        '"__row_count__"',
+    ]
+
+
+@sql_count_checker(query_count=0)
+def test_dropna(ordered_df):
+    new_ordered_df = ordered_df.dropna()
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    # ordered dataframe is included
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+        '"b"',
+    ]
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_union_all(ordered_df):
+    new_ordered_df = ordered_df.union_all(ordered_df)
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    # ordered columns are included
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == ordered_df.projected_column_snowflake_quoted_identifiers
+    )
+    assert (
+        new_ordered_df.ordering_column_snowflake_quoted_identifiers
+        == ordered_df.projected_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_groupby(ordered_df):
+    new_ordered_df = ordered_df.group_by(
+        ['"a"', '"c"'], min_('"a"').as_('"min_a"'), max_('"c"').as_('"max_c"')
+    )
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.ordering_column_snowflake_quoted_identifiers == ['"a"', '"c"']
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_pivot(ordered_df):
+    new_ordered_df = ordered_df.pivot('"a"', None, None, min_('"a"'))
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == new_ordered_df.ordering_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_unpivot(ordered_df):
+    new_ordered_df = ordered_df.unpivot('"d"', '"e"', ['"a"', '"c"'])
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == new_ordered_df.ordering_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=1)
+def test_unpivot_column_mapping(ordered_df):
+    new_ordered_df = ordered_df.unpivot(
+        '"d"', '"e"', ['"a"', '"c"'], {'"a"': '"Apple"'}
+    )
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert new_ordered_df.to_pandas()["e"].to_list() == [
+        "Apple",
+        "c",
+        "Apple",
+        "c",
+    ]
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == new_ordered_df.ordering_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_agg(ordered_df):
+    new_ordered_df = ordered_df.agg(
+        min_('"a"').as_('"max_a"'), max_('"c"').as_('"max_c"')
+    )
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    assert (
+        new_ordered_df.projected_column_snowflake_quoted_identifiers
+        == new_ordered_df.ordering_column_snowflake_quoted_identifiers
+    )
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_filter(ordered_df):
+    new_ordered_df = ordered_df.filter(col('"a"') == 1)
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    # ordered dataframe is included
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+        '"b"',
+    ]
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=0)
+def test_limit(ordered_df):
+    new_ordered_df = ordered_df.limit(10)
+    _verify_dataframe_reference(new_ordered_df, ordered_df, False)
+    _verify_dataframe_reference_quoted_identifiers(new_ordered_df)
+    # ordered dataframe is included
+    assert new_ordered_df.projected_column_snowflake_quoted_identifiers == [
+        '"row_position"',
+        '"a"',
+        '"c"',
+        '"b"',
+    ]
+    assert new_ordered_df.ordering_columns == ordered_df.ordering_columns
+    assert new_ordered_df.row_position_snowflake_quoted_identifier is None
+
+
+@sql_count_checker(query_count=1)
+def test_write(session, ordered_df):
+    table_name = random_name_for_temp_object(TempObjectType.TABLE)
+    ordered_df.write.save_as_table(table_name, table_type="temp")
+    snowpark_df = session.table(table_name)
+    all_columns_set = set(snowpark_df.columns)
+    # verify projected columns, ordering columns and row position column are all in the result columns
+    assert set(ordered_df.projected_column_snowflake_quoted_identifiers).issubset(
+        all_columns_set
+    )
+    assert set(ordered_df.ordering_column_snowflake_quoted_identifiers).issubset(
+        all_columns_set
+    )
+    assert ordered_df.row_position_snowflake_quoted_identifier in all_columns_set
+
+
+@sql_count_checker(query_count=1)
+def test_to_pandas(ordered_df):
+    assert_frame_equal(
+        ordered_df.to_pandas(),
+        native_pd.DataFrame([[1, 1, 3], [0, 4, 6]], columns=["row_position", "a", "c"]),
+        check_dtype=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["left", "outer"],
+)
+@pytest.mark.parametrize(
+    "align_on_cols",
+    [['"A"'], ['"A"', '"C"'], ['"row_pos"', '"C"']],
+)
+@sql_count_checker(query_count=1, join_count=0)
+def test_self_align_optimizable(session, df1, how, align_on_cols):
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+    # project new columns from ordered_df1
+    ordered_df2 = ordered_df1.select(
+        '"A"', (Column('"B"') + 1).as_('"B_postfix"'), '"C"', '"row_pos"'
+    )
+    aligned_ordered_df = ordered_df1.align(
+        ordered_df2,
+        left_on_cols=align_on_cols,
+        right_on_cols=align_on_cols,
+        how=how,
+    )
+
+    # verify the result frame share the same frame as ordered_df1
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df1, should_match=True)
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df2, should_match=True)
+    _verify_dataframe_reference_quoted_identifiers(aligned_ordered_df)
+
+    # verify the result ordering column is the same as ordered_df1
+    assert aligned_ordered_df.ordering_columns == ordered_df1.ordering_columns
+
+    # verify the row position column is the same as ordered_df1
+    assert (
+        aligned_ordered_df.row_position_snowflake_quoted_identifier
+        == ordered_df1.row_position_snowflake_quoted_identifier
+    )
+
+    # verify the result is the same as join on row pos column
+    df2 = df1.copy()
+    df2["B"] = df2["B"] + 1
+
+    _join_or_align_result_validation_helper(
+        aligned_ordered_df,
+        ordered_df1,
+        ordered_df2,
+        df1,
+        df2,
+        left_on_cols=["row_pos"],
+        right_on_cols=["row_pos"],
+        how=how,
+    )
+
+
+@pytest.mark.parametrize(
+    "how",
+    ["left", "outer"],
+)
+@pytest.mark.parametrize(
+    "left_on_cols, right_on_cols, right_ordering_columns",
+    [
+        (['"A"'], ['"A"'], [OrderingColumn('"C"')]),
+        (['"A"', '"B"'], ['"A"', '"B_postfix"'], None),
+    ],
+)
+@sql_count_checker(query_count=1, join_count=1)
+def test_self_align_not_optimizable(
+    session, df1, how, left_on_cols, right_on_cols, right_ordering_columns
+):
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+    # project new columns from ordered_df1
+    ordered_df2 = ordered_df1.select(
+        '"A"', (Column('"B"') + 1).as_('"B_postfix"'), '"C"', '"row_pos"'
+    )
+    if right_ordering_columns is not None:
+        ordered_df2 = ordered_df2.sort(right_ordering_columns)
+
+    aligned_ordered_df = ordered_df1.align(
+        ordered_df2,
+        left_on_cols=left_on_cols,
+        right_on_cols=right_on_cols,
+        how=how,
+    )
+
+    # verify the result frame do not share the same frame as ordered_df1
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df1, should_match=False)
+    _verify_dataframe_reference(aligned_ordered_df, ordered_df2, should_match=False)
+    _verify_dataframe_reference_quoted_identifiers(aligned_ordered_df)
+
+    df2 = df1.copy()
+    df2["B"] = df2["B"] + 1
+    if right_ordering_columns is not None:
+        ordering_column_identifiers = (
+            ordered_df2.ordering_column_snowflake_quoted_identifiers
+        )
+        sort_columns = [
+            unquote_name_if_quoted(quoted_identifier)
+            for quoted_identifier in ordering_column_identifiers
+        ]
+        df2.sort_values(sort_columns)
+
+    sort = False
+    if how == "outer":
+        sort = True
+    _join_or_align_result_validation_helper(
+        aligned_ordered_df,
+        ordered_df1,
+        ordered_df2,
+        df1,
+        df2,
+        left_on_cols=left_on_cols,
+        right_on_cols=right_on_cols,
+        how=how,
+        sort=sort,
+    )
+
+
+@pytest.mark.parametrize("include_ordering_columns", [True, False])
+@pytest.mark.parametrize("include_row_position_column", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+@sql_count_checker(query_count=0, join_count=0)
+def test_to_projected_snowpark_dataframe(
+    session, df1, include_ordering_columns, include_row_position_column, sort
+):
+    ordered_df = _create_ordered_dataframe(
+        session,
+        df1,
+        projected_columns=['"A"', '"C"'],
+        ordering_columns=['"B"', '"C"'],
+        row_position_column='"row_pos"',
+    )
+    snowpark_dataframe = ordered_df.to_projected_snowpark_dataframe(
+        include_ordering_columns=include_ordering_columns,
+        include_row_position_column=include_row_position_column,
+        sort=sort,
+    )
+    snowpark_dataframe_quoted_identifiers = [
+        f.column_identifier.quoted_name for f in snowpark_dataframe.schema.fields
+    ]
+    result_columns_quoted_identifiers = ['"A"', '"C"']
+    if include_ordering_columns:
+        result_columns_quoted_identifiers += ['"B"']
+    if include_row_position_column:
+        result_columns_quoted_identifiers += ['"row_pos"']
+    assert snowpark_dataframe_quoted_identifiers == result_columns_quoted_identifiers
+    _verify_order_by_query(snowpark_dataframe, should_include_order_by=sort)
+
+
+@sql_count_checker(query_count=1, join_count=0)
+def test_to_projected_snowpark_dataframe_with_rename(ordered_df):
+    # rename column '"a"' to '"b"', which is the same as the ordering column that will be dropped
+    col_mapper = {'"a"': '"b"', '"c"': '"e"'}
+    snowpark_dataframe = ordered_df.to_projected_snowpark_dataframe(
+        col_mapper=col_mapper
+    )
+
+    # call collect to trigger evaluation with no failure
+    snowpark_dataframe.collect()
+
+    # verify the renamed column
+    result_columns_quoted_identifiers = ['"row_position"', '"b"', '"e"']
+    snowpark_dataframe_quoted_identifiers = get_snowpark_dataframe_quoted_identifiers(
+        snowpark_dataframe
+    )
+    assert snowpark_dataframe_quoted_identifiers == result_columns_quoted_identifiers
+
+
+@sql_count_checker(query_count=0)
+def test_snowpark_pandas_statement_params(session, df1):
+    ordered_df1 = _create_ordered_dataframe(
+        session, df1, ordering_columns=['"row_pos"'], row_position_column='"row_pos"'
+    )
+
+    with patch.object(snowflake.snowpark.DataFrame, "collect") as mocked_collect:
+        ordered_df1.collect()
+        mocked_collect.assert_called_once()
+        assert (
+            "pandas"
+            == mocked_collect.call_args.kwargs["statement_params"]["SNOWPARK_API"]
+        )
+
+    with patch.object(snowflake.snowpark.DataFrame, "collect") as mocked_collect:
+        ordered_df1.collect(statement_params={"abc": "efg"})
+        mocked_collect.assert_called_once()
+        assert (
+            "pandas"
+            == mocked_collect.call_args.kwargs["statement_params"]["SNOWPARK_API"]
+        )
+        assert "efg" == mocked_collect.call_args.kwargs["statement_params"]["abc"]
diff --git a/tests/integ/modin/test_qcut.py b/tests/integ/modin/test_qcut.py
new file mode 100644
index 00000000000..37bf740146c
--- /dev/null
+++ b/tests/integ/modin/test_qcut.py
@@ -0,0 +1,262 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+
+import modin.pandas as pd
+import numpy as np
+import numpy.testing as npt
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+bool_arg = {"True": True, "False": False, "None": None}
+bool_arg_keys = list(bool_arg.keys())
+bool_arg_values = list(bool_arg.values())
+
+
+@pytest.mark.parametrize(
+    "x,q",
+    [
+        (
+            range(5),
+            4,
+        )
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_qcut_non_series(x, q):
+    npt.assert_almost_equal(
+        pd.qcut(x, q, labels=False), native_pd.qcut(x, q, labels=False)
+    )
+
+
+@pytest.mark.parametrize(
+    "n,q,expected_query_count",
+    [
+        (5, 1, 1),
+        (100, 1, 1),
+        (1000, 1, 16),
+        (5, 10, 1),
+        (100, 10, 1),
+        (1000, 10, 61),
+        # TODO: With SNOW-1229442, uncomment the following two lines.
+        # These configs do not work, as the current quantile implementation
+        # is buggy and fails within Snowpark for larger len(q).
+        # (5, 47, 1), (100, 47, 1), (1000, 47, 1),
+        # (5, 10000, 1), (100, 10000, 1), (1000, 10000, 1)
+    ],
+)
+def test_qcut_series(n, q, expected_query_count):
+
+    native_ans = native_pd.qcut(
+        native_pd.Series(range(n)), q, labels=False, duplicates="drop"
+    )
+
+    # Large n can not inline everything into a single query and will instead create a temp table.
+    snow_series = pd.Series(list(range(n)))
+    with SqlCounter(
+        query_count=expected_query_count,
+        high_count_expected=True,
+        high_count_reason="Bug in quantile, TODO SNOW-1229442.",
+    ):
+        ans = pd.qcut(snow_series, q, labels=False, duplicates="drop")
+
+    npt.assert_almost_equal(native_ans, ans)
+
+
+@pytest.mark.parametrize("data,q", [([0, 100, 200, 400, 600, 700, 2000], 5)])
+def test_qcut_series_non_range_data(data, q):
+    native_ans = native_pd.qcut(native_pd.Series(data), q, labels=False)
+
+    # Large n can not inline everything into a single query and will instead create a temp table.
+    with SqlCounter(query_count=2):
+        ans = pd.qcut(pd.Series(data), q, labels=False)
+
+    npt.assert_almost_equal(native_ans, ans)
+
+
+@pytest.mark.parametrize("n,expected_query_count", [(5, 1), (100, 1), (1000, 6)])
+@pytest.mark.parametrize("q", [1, 10, 47, 10000])
+def test_qcut_series_with_none_labels(n, q, expected_query_count):
+
+    native_ans = native_pd.qcut(native_pd.Series(range(n)), q, labels=None)
+
+    # Large n can not inline everything into a single query and will instead create a temp table.
+    with SqlCounter(query_count=expected_query_count):
+        ans = pd.qcut(pd.Series(range(n)), q, labels=None)
+
+    # assign to series to compare
+    native_ans = native_pd.Series(native_ans)
+    ans = native_pd.Series(ans)
+
+    native_pd.testing.assert_series_equal(
+        ans, native_ans, check_exact=False, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    "q",
+    [
+        1,
+        10,
+        # TODO: Once SNOW-1229442 is done, uncomment following lin.
+        # 47, 10000
+    ],
+)
+@pytest.mark.parametrize("s", [native_pd.Series([0]), native_pd.Series([1])])
+def test_qcut_series_single_element_negative(q, s):
+    # if q != 1, then an error will be produced for a single-element series.
+    if q != 1:
+        # Error will be:
+        #  ValueError: Bin edges must be unique: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]).
+        #  You can drop duplicate edges by setting the 'duplicates' kwarg
+        re_match = "Bin edges must be unique: .*"
+        with pytest.raises(ValueError, match=re_match):
+            native_pd.qcut(s, q, labels=False)
+        with SqlCounter(query_count=3):
+            with pytest.raises(ValueError, match=re_match):
+                pd.qcut(pd.Series(s), q, labels=False)
+    else:
+        native_ans = native_pd.qcut(s, q, labels=False)
+
+        with SqlCounter(query_count=3):
+            ans = pd.qcut(pd.Series(s), q, labels=False)
+
+        npt.assert_almost_equal(native_ans, ans)
+
+
+@pytest.mark.parametrize(
+    "q",
+    [
+        1,
+        10,
+        # uncomment this line once quantile is fixed in TODO SNOW-1229442.
+        # 47, 10000
+    ],
+)
+@pytest.mark.parametrize("s", [native_pd.Series([0]), native_pd.Series([1])])
+def test_qcut_series_single_element(q, s):
+    native_ans = native_pd.qcut(s, q, duplicates="drop", labels=False)
+
+    with SqlCounter(query_count=1, join_count=1, union_count=q):
+        ans = pd.qcut(pd.Series(s), q, duplicates="drop", labels=False)
+
+    npt.assert_almost_equal(native_ans, ans)
+
+
+@pytest.mark.xfail(reason="TODO: SNOW-1225562 support retbins")
+@sql_count_checker(query_count=1)
+def test_qcut_retbins_negative():
+    snow_series = pd.Series(range(10))
+
+    pd.qcut(snow_series, 4, retbins=False)
+
+
+@sql_count_checker(query_count=0)
+def test_qcut_labels_negative():
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Bin labels must either be False, None or passed in as a list-like argument"
+        ),
+    ):
+        native_pd.qcut(native_pd.Series(range(5)), 4, labels=True, duplicates="drop")
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Bin labels must either be False, None or passed in as a list-like argument"
+        ),
+    ):
+        pd.qcut(pd.Series(range(5)), 4, labels=True, duplicates="drop")
+
+
+@pytest.mark.parametrize("q", [[0.5, 0.2]])
+@sql_count_checker(query_count=0)
+def test_qcut_increasing_quantiles_negative(q):
+    native_series = native_pd.Series([1, 2, 4, 6, 9, 2])
+    snow_series = pd.Series(native_series)
+
+    def helper(s):
+        if isinstance(s, pd.Series):
+            return pd.qcut(s, q)
+        else:
+            return native_pd.qcut(s, q)
+
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        helper,
+        expect_exception=ValueError,
+        expect_exception_match=re.escape("left side of interval must be <= right side"),
+    )
+
+
+@pytest.mark.parametrize("data", [[2014, 2014, 2015, 2016, 2017, 2014]])
+@pytest.mark.parametrize(
+    "q,union_count",
+    [([0, 0.15, 0.35, 0.51, 0.78, 1], 5), ([0, 0.5, 1], 2), ([0.2, 0.8], 1)],
+)
+def test_qcut_list_of_values(data, q, union_count):
+    native_s = native_pd.Series(data)
+    snow_s = pd.Series(data)
+
+    native_ans = native_pd.qcut(native_s, q, duplicates="drop", labels=False)
+
+    with SqlCounter(query_count=1, join_count=1, union_count=union_count):
+        ans = pd.qcut(snow_s, q, duplicates="drop", labels=False)
+
+    npt.assert_almost_equal(ans, native_ans)
+
+
+def test_qcut_list_of_values_raise_negative():
+    # There will be duplicate bins for the given quantiles, which will result in an error.
+    data = [2014, 2014, 2015, 2016, 2017, 2014]
+    q = [0, 0.15, 0.35, 0.51, 0.78, 1]
+
+    native_s = native_pd.Series(data)
+    snow_s = pd.Series(data)
+
+    # Error produced here will be:
+    # ValueError: Bin edges must be unique: array([2014.  , 2014.  , 2014.  , 2014.55, 2015.9 , 2017.  ]).
+    #                You can drop duplicate edges by setting the 'duplicates' kwarg
+    expected_msg = "Bin edges must be unique: "
+    with pytest.raises(ValueError, match=expected_msg):
+        native_pd.qcut(native_s, q, duplicates="raise", labels=False)
+
+    with SqlCounter(query_count=3, union_count=10):
+        with pytest.raises(ValueError, match=expected_msg):
+            pd.qcut(snow_s, q, duplicates="raise", labels=False)
+
+
+@sql_count_checker(query_count=0)
+def test_qcut_quantile_limit_exhausted():
+
+    snow_s = pd.Series(range(100000))
+    with pytest.raises(
+        NotImplementedError,
+        match="Snowpark pandas API supports at most .* quantiles\\.",
+    ):
+        snow_s.quantile(np.linspace(0, 1, 100))
+
+
+@pytest.mark.parametrize("q", [-2, 4.5, -1.0, [6.8], [0.0, 1.0, -1.0]])
+@sql_count_checker(query_count=0)
+def test_qcut_invalid_quantiles_negative(q):
+    snow_s = pd.Series(range(5))
+    native_s = native_pd.Series(range(5))
+
+    # make sure same exception is produced in Snowpark pandas API
+    try:
+        native_s.quantile(q)
+    except Exception as e:
+        expected_exception = type(e)
+        expected_message = re.escape(str(e))
+
+    with pytest.raises(expected_exception, match=expected_message):
+        snow_s.quantile(q)
diff --git a/tests/integ/modin/test_session.py b/tests/integ/modin/test_session.py
new file mode 100644
index 00000000000..59865fceaef
--- /dev/null
+++ b/tests/integ/modin/test_session.py
@@ -0,0 +1,262 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import os
+from typing import Optional
+
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.connector.constants import CONFIG_FILE, CONNECTIONS_FILE
+from snowflake.snowpark import Session
+from snowflake.snowpark.exceptions import SnowparkSessionException
+from snowflake.snowpark.session import (
+    _add_session,
+    _get_active_sessions,
+    _remove_session,
+)
+from tests.integ.conftest import running_on_jenkins, running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result
+
+NO_ACTIVE_SESSION_ERROR_PATTERN = (
+    r"Snowpark pandas requires an active snowpark session, but there is none. "
+    + r"Please create one by following the instructions here: "
+)
+MULTIPLE_ACTIVE_SESSIONS_ERROR_PATTERN = (
+    r"There are multiple active snowpark sessions, but you need to choose one "
+    + r"for Snowpark pandas. Please assign one to Snowpark pandas with a "
+    + r"statement like `modin.pandas.session = session`."
+)
+
+
+class SetPandasSession:
+    """
+    Context manager that sets the pandas sesion on enter, and resets it on exit.
+
+    Attributes:
+        _new_session: An Optional[Session] new pandas session
+        _old_session: The Optional[Session] old pandas session.
+    """
+
+    def __init__(self, new_session: Optional[Session]) -> "SetPandasSession":
+        self._new_session = new_session
+
+    def __enter__(self) -> None:
+        """Save the old pandas session and set the session to the new one."""
+        try:
+            self._old_session = pd.session
+        except SnowparkSessionException:
+            # Accessing pd.session may raise an error if it hasn't been assigned a
+            # value yet and there is no unique active snowpark session. In that case,
+            # use None as a sentinel value for missing session. We can use None
+            # as a sentinel value because pd.session is never None, even if there
+            # is no active Snowpark session.
+            self._old_session = None
+        pd.session = self._new_session
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Restore the old pands session, if it existed."""
+        if self._old_session is not None:
+            pd.session = self._old_session
+        return None
+
+
+@pytest.fixture(
+    # run this fixture before every test case in this module so that global
+    # session state doesn't affect these tests.
+    autouse=True
+)
+def remove_all_active_sessions():
+    """Remove all active Snowpark sessions, then restore them when the test finishes."""
+    try:
+        sessions = list(_get_active_sessions())
+    except SnowparkSessionException:
+        # we get a SnowparkSessionException if there are no active sessions.
+        sessions = []
+    for session in sessions:
+        _remove_session(session)
+    yield
+    for session in sessions:
+        _add_session(session)
+
+
+@pytest.fixture(
+    # run this fixture before every test case in this module so that global
+    # pd.session state doesn't affect these tests.
+    autouse=True
+)
+def clear_pandas_session():
+    """Clear the Snowpark pandas session, if it exists, then reset it when the test finishes."""
+    with SetPandasSession(None):
+        yield
+
+
+@pytest.fixture
+def new_session(db_parameters):
+    new_session = Session.builder.configs(db_parameters).create()
+    yield new_session
+    new_session.close()
+
+
+# Skip the SQL counter checks for tests in this module.
+
+# SqlCounter will get or create a session and count the queries that that session
+# executes. Some of these tests need to start with the session removed, so they
+# can't use SqlCounter. Some of the tests use multiple sessions, so the query
+# counts from SqlCounter aren't meaningful. These tests are about session
+# management, so we don't care about the SQL query counts. We require integration
+# tests to have run a SqlCounter of some sort, so each test case will create a
+# SQL counter with no_check=True.
+
+
+@sql_count_checker(no_check=True)
+def test_snowpark_pandas_session_is_global_session(new_session):
+    assert new_session is pd.session
+
+
+@sql_count_checker(no_check=True)
+def test_cannot_create_dataframe_without_session():
+    with pytest.raises(SnowparkSessionException, match=NO_ACTIVE_SESSION_ERROR_PATTERN):
+        pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+
+@sql_count_checker(no_check=True)
+def test_cannot_access_pandas_session_without_session():
+    with pytest.raises(SnowparkSessionException, match=NO_ACTIVE_SESSION_ERROR_PATTERN):
+        pd.session
+
+
+@sql_count_checker(no_check=True)
+def test_cannot_access_session_attribute_without_active_session():
+    with pytest.raises(SnowparkSessionException, match=NO_ACTIVE_SESSION_ERROR_PATTERN):
+        pd.session
+
+
+@pytest.mark.skipif(
+    running_on_public_ci() or running_on_jenkins(),
+    reason="cannot create config file on CI due to permission issue. "
+    "TODO SNOW-918497: investigate the failure and enable it in jenkins",
+)
+@sql_count_checker(no_check=True)
+def test_automatically_create_session_from_config_file(db_parameters):
+    if os.path.exists(CONNECTIONS_FILE) or os.path.exists(CONFIG_FILE):
+        pytest.fail("Please remove the existing config file to run this test")
+
+    try:
+        os.makedirs(os.path.dirname(CONNECTIONS_FILE), exist_ok=True)
+        with open(CONNECTIONS_FILE, "w") as f:
+            f.write('default_connection_name = "default"\n\n')
+            f.write("[default]\n")
+            for k, v in db_parameters.items():
+                f.write(f'{k} = "{v}"\n')
+        with pytest.raises(
+            SnowparkSessionException, match=NO_ACTIVE_SESSION_ERROR_PATTERN
+        ):
+            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    finally:
+        try:
+            os.remove(CONNECTIONS_FILE)
+        except FileNotFoundError:
+            pass
+
+
+@sql_count_checker(no_check=True)
+def test_automatically_create_session_from_config_env_var(db_parameters, monkeypatch):
+    if os.path.exists(CONNECTIONS_FILE) or os.path.exists(CONFIG_FILE):
+        pytest.fail("Please remove the existing config file to run this test")
+
+    import tomlkit
+
+    doc = tomlkit.document()
+    default_con = tomlkit.table()
+    for k, v in db_parameters.items():
+        default_con[k] = v
+    doc["default"] = default_con
+    with monkeypatch.context() as m:
+        m.setenv("SNOWFLAKE_CONNECTIONS", tomlkit.dumps(doc))
+        m.setenv("SNOWFLAKE_DEFAULT_CONNECTION_NAME", "default")
+        with pytest.raises(
+            SnowparkSessionException, match=NO_ACTIVE_SESSION_ERROR_PATTERN
+        ):
+            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+
+@sql_count_checker(no_check=True)
+def test_multiple_session(db_parameters, new_session):
+    _ = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    assert pd.session is new_session
+
+    with Session.builder.configs(db_parameters).create() as second_new_session:
+        for curr_session in [second_new_session, new_session]:
+            pd.session = curr_session
+            df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+            assert len(df) == 3
+            assert (
+                df._query_compiler._modin_frame.ordered_dataframe.session
+                is curr_session
+            )
+
+        # when unset, it will not work
+        pd.session = None
+        with pytest.raises(
+            SnowparkSessionException, match=MULTIPLE_ACTIVE_SESSIONS_ERROR_PATTERN
+        ):
+            pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+
+@sql_count_checker(no_check=True)
+def test_snowpark_pandas_session_class_does_not_exist_snow_1022098():
+    with pytest.raises(AttributeError):
+        pd.Session
+
+
+@pytest.mark.parametrize(
+    "operation",
+    [
+        lambda df: df.apply(lambda x: x + 1, axis=1),
+        lambda df: df[0].apply(lambda x: x + 1),
+        lambda df: df.groupby(0).apply(lambda x: x + 1),
+    ],
+)
+class TestApplyLikeMethodsWithMultipleSessionsSnow1216902:
+    """
+    Test some cases from SNOW-1216902
+
+    Apply-like methods were trying to find and use the unique active Snowpark
+    session instead of using the session that the dataframe was already using.
+    """
+
+    @sql_count_checker(no_check=True)
+    def test_with_pandas_session(self, new_session, db_parameters, operation):
+        # in the beginning, new_session is the only active session.
+        # create a Snowpark dataframe using that session.
+        dfs = create_test_dfs([1])
+        # Then, create a new session. Note that even though we don't assign
+        # this session to a variable, it stays in the global variable
+        # containing all active sessions.
+        with Session.builder.configs(db_parameters).create():
+            # Even though there are multiple sessions, dfs[0] is associated
+            # with just one of them, and we'll use that one to perform
+            # `operation`.
+            eval_snowpark_pandas_result(*dfs, operation)
+        # The extra session's context manager will close it on exit.
+
+    @sql_count_checker(no_check=True)
+    def test_with_no_pandas_session(self, new_session, db_parameters, operation):
+        # in the beginning, new_session is the only active session.
+        # create a Snowpark dataframe using that session.
+        dfs = create_test_dfs([1])
+        # Enter the following context managers in the order listed:
+        # 1) Create a new session with Session.builder.configs().create(). Note that even
+        #    though we don't assign this session to a variable, it stays in the global variable
+        #    containing all active sessions.
+        # 2) Set pd.session to None.
+        # Since we have already created the dataframe with the original session, we
+        # can call apply() on it and use that session to execute the query.
+        with Session.builder.configs(db_parameters).create(), SetPandasSession(None):
+            eval_snowpark_pandas_result(*dfs, operation)
+        # We then exit the managers in the opposite order:
+        # 1) Reset pd.session to the original session
+        # 2) Close the extra session that we created in 1)
diff --git a/tests/integ/modin/test_sql_counter.py b/tests/integ/modin/test_sql_counter.py
new file mode 100644
index 00000000000..544eb9c174f
--- /dev/null
+++ b/tests/integ/modin/test_sql_counter.py
@@ -0,0 +1,276 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark import QueryRecord
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import assert_frame_equal
+
+
+class CustomException(BaseException):
+    pass
+
+
+@sql_count_checker(query_count=3)
+def test_sql_counter_with_decorator():
+    for _ in range(3):
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        assert len(df) == 3
+
+
+@pytest.mark.parametrize("test_arg", [1, 2])
+@sql_count_checker(query_count=3)
+def test_sql_counter_with_decorator_with_parametrize(test_arg):
+    for _ in range(3):
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        assert len(df) == 3
+
+
+@pytest.mark.parametrize(
+    "num_queries, check_sql_counter",
+    [
+        (1, True),
+        (2, False),
+        (3, True),
+    ],
+)
+def test_sql_counter_with_fixture(num_queries, check_sql_counter, sql_counter):
+    # This is added here so that enforcement of sql counts detects and does not flag this test.
+    with SqlCounter(query_count=0):
+        pass
+
+    for i in range(num_queries):
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        if i % 2 == 0:
+            df = df.merge(df)
+        assert len(df) == 3
+    if check_sql_counter:
+        sql_counter.expects(query_count=num_queries, join_count=(num_queries + 1) / 2)
+
+
+@pytest.mark.parametrize("num_queries", [1, 2, 3])
+def test_sql_counter_with_fixture_with_repeat_checks_inside_loop(
+    sql_counter, num_queries
+):
+    for i in range(num_queries):
+        for _ in range(i):
+            df = pd.DataFrame({"a": [1, 2, 3]})
+            df = df.merge(df)
+            assert len(df) == 3
+        sql_counter.expects(query_count=i, join_count=i)
+
+
+@sql_count_checker(no_check=True)
+def test_sql_counter_with_context_manager_inside_loop():
+    for _ in range(3):
+        with SqlCounter(query_count=1) as sc:
+            df = pd.DataFrame({"a": [1, 2, 3]})
+            assert len(df) == 3
+
+        with pytest.raises(
+            AssertionError, match="SqlCounter is dead and can no longer be used."
+        ):
+            sc.expects(query_count=0)
+
+
+@sql_count_checker(no_check=True)
+def test_sql_counter_with_multiple_checks():
+    with SqlCounter(query_count=1):
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        assert len(df) == 3
+
+    with SqlCounter(query_count=1):
+        df = pd.DataFrame({"b": [4, 5, 6]})
+        assert len(df) == 3
+
+    with SqlCounter(query_count=1):
+        df = pd.DataFrame({"c": [7, 8, 9]})
+        assert len(df) == 3
+
+
+@sql_count_checker(no_check=True)
+def test_sql_counter_with_context_manager_outside_loop():
+    sc = SqlCounter(query_count=3)
+    sc.__enter__()
+    for _ in range(3):
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        assert len(df) == 3
+    sc.__exit__(None, None, None)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+def test_sql_counter_with_fallback_count():
+    df_data = {
+        "name": ["Alfred", "Batman", "Catwoman"],
+        "toy": [np.nan, "Batmobile", "Bullwhip"],
+        "born": [pd.NaT, pd.Timestamp("1940-04-25"), pd.NaT],
+    }
+
+    df = pd.DataFrame(df_data).dropna(axis="columns")
+    assert len(df) == 3
+
+
+@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
+def test_sql_counter_with_df_udtf_count():
+    df = pd.DataFrame([[1, 2], [3, 4]]).apply(lambda x: str(type(x)), axis=1, raw=True)
+    assert len(df) == 2
+
+
+@sql_count_checker(query_count=4, udf_count=1)
+def test_sql_counter_with_series_udf_count():
+    df = pd.Series([1, 2, 3, None]).apply(lambda x: x + 1)
+    assert len(df.to_pandas()) == 4
+
+
+@sql_count_checker(
+    query_count=11,
+    high_count_expected=True,
+    high_count_reason="This test validates high_count_reason",
+)
+def test_high_sql_count_pass():
+    for i in range(11):
+        df = pd.DataFrame({"a": list(range(i))})
+        assert len(df) == i
+
+
+def test_sql_count_with_joins():
+    with SqlCounter(query_count=1, join_count=1) as sql_counter:
+        sql_counter._add_query(
+            QueryRecord(query_id="1", sql_text="SELECT A FROM X JOIN Y")
+        )
+
+    with SqlCounter(query_count=1, join_count=2) as sql_counter:
+        sql_counter._add_query(
+            QueryRecord(query_id="1", sql_text="SELECT A FROM X JOIN Y JOIN Z")
+        )
+
+    with SqlCounter(query_count=2, join_count=5) as sql_counter:
+        sql_counter._add_query(
+            QueryRecord(query_id="1", sql_text="SELECT A FROM X JOIN Y JOIN Z")
+        )
+        sql_counter._add_query(
+            QueryRecord(query_id="2", sql_text="SELECT A FROM X JOIN Y JOIN Z JOIN W")
+        )
+
+
+def test_sql_count_by_query_substr():
+    with SqlCounter(query_count=1) as sql_counter:
+        sql_counter._add_query(
+            QueryRecord(query_id="1", sql_text="SELECT A FROM X JOIN Y JOIN W")
+        )
+
+        assert sql_counter._count_by_query_substr(contains=[" JOIN "]) == 1
+        assert (
+            sql_counter._count_by_query_substr(
+                starts_with=["SELECT"], contains=[" JOIN "]
+            )
+            == 1
+        )
+        assert (
+            sql_counter._count_instances_by_query_substr(
+                starts_with=["FOO"], contains=[" JOIN "]
+            )
+            == 0
+        )
+
+
+def test_sql_count_instances_by_query_substr():
+    with SqlCounter(query_count=1) as sql_counter:
+        sql_counter._add_query(
+            QueryRecord(query_id="1", sql_text="SELECT A FROM X JOIN Y JOIN W")
+        )
+
+        assert sql_counter._count_instances_by_query_substr(contains=[" JOIN "]) == 2
+        assert (
+            sql_counter._count_instances_by_query_substr(
+                starts_with=["SELECT"], contains=[" JOIN "]
+            )
+            == 2
+        )
+        assert (
+            sql_counter._count_instances_by_query_substr(
+                starts_with=["FOO"], contains=[" JOIN "]
+            )
+            == 0
+        )
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+# This test passes even though it exceeds the high query count because it is adjusted down based on the fallback count.
+@sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+def test_high_sql_count_with_fallback_pass():
+    for _ in range(2):
+        df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
+        expected = native_pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
+        result = df.rename(str.lower, axis=0)
+        assert_frame_equal(result, expected, check_dtype=False, check_index_type=False)
+
+
+@pytest.mark.xfail(
+    reason="We expect this to fail, but we don't treat as a hard failure since it is validating expect_high_count=False",
+    strict=True,
+)
+@sql_count_checker(query_count=11)
+def test_high_sql_count_fail():
+    for i in range(11):
+        df = pd.DataFrame({"a": list(range(i))})
+        assert len(df) == i
+
+
+@pytest.mark.xfail(
+    reason="We expect this to fail since no high_count_reason was provided",
+    strict=True,
+)
+@sql_count_checker(query_count=11, high_count_expected=True)
+def test_high_sql_count_expect_high_count_no_reason():
+    for i in range(11):
+        df = pd.DataFrame({"a": list(range(i))})
+        assert len(df) == i
+
+
+class TestSqlCounterNotRequiredOrCheckedForStrictXfailedTest:
+    """
+    If we do a strict xfail without specifying the exception type, the
+    exception about the missing SQL counter will satisfy the xfail, but
+    we don't want to get that exception. Instead, check that we can find
+    and expect the custom exception that the test function itself should
+    raise.
+    """
+
+    @pytest.mark.xfail(raises=CustomException, strict=True)
+    def test_counter_not_required(self):
+        raise CustomException
+
+    @pytest.mark.xfail(raises=CustomException, strict=True)
+    @sql_count_checker(query_count=1)
+    def test_inaccurate_counter_not_checked(self):
+        raise CustomException
+
+
+@pytest.mark.parametrize(
+    "expected_query_count",
+    [0, 1],
+    ids=["innacurate_query_count", "accurate_query_count"],
+)
+@sql_count_checker(query_count=0)
+def test_exception_propagates_through_sql_counter_snow_1042244(expected_query_count):
+    # look for a custom exception class so we can be sure we're observing the
+    # exception we're raising within the body of the SqlCounter, and not
+    # an exception that comes from the SqlCounter itself.
+    with pytest.raises(CustomException):
+        with SqlCounter(query_count=expected_query_count):
+            raise CustomException
diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py
new file mode 100644
index 00000000000..fa360429edc
--- /dev/null
+++ b/tests/integ/modin/test_telemetry.py
@@ -0,0 +1,590 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import json
+from typing import Any, Optional
+from unittest.mock import ANY, MagicMock, patch
+
+import modin.pandas as pd
+import numpy as np
+import pandas
+import pytest
+from modin.pandas.dataframe import DataFrame
+from pandas._libs.lib import NoDefault, no_default
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+import snowflake.snowpark.session
+from snowflake.snowpark._internal.telemetry import TelemetryClient, TelemetryField
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    _not_equal_to_default,
+    _send_snowpark_pandas_telemetry_helper,
+    _try_get_kwargs_telemetry,
+    error_to_telemetry_type,
+    snowpark_pandas_telemetry_method_decorator,
+)
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import BASIC_TYPE_DATA1, BASIC_TYPE_DATA2
+from tests.unit.modin.test_telemetry import snowpark_pandas_error_test_helper
+
+
+def _extract_snowpark_pandas_telemetry_log_data(
+    *,
+    expected_func_name: str,
+    session: snowflake.snowpark.session.Session,
+) -> dict:
+    """
+    Extracts Snowpark pandas telemetry log data for a specific function name.
+
+    Args:
+        expected_func_name: The expected name of the function.
+        session: Session instance.
+
+    Returns:
+        A dictionary containing the extracted telemetry log data.
+
+    """
+    for i in range(len(session._conn._telemetry_client.telemetry._log_batch)):
+        try:
+            if (
+                session._conn._telemetry_client.telemetry._log_batch[i].to_dict()[
+                    "message"
+                ][TelemetryField.KEY_DATA.value]["func_name"]
+                == expected_func_name
+            ):
+                return session._conn._telemetry_client.telemetry._log_batch[
+                    i
+                ].to_dict()["message"][TelemetryField.KEY_DATA.value]
+        except Exception:
+            # Exception usually means this message does not have attribute we want and we don't really care
+            pass
+    return dict()
+
+
+@patch(
+    "snowflake.snowpark.modin.plugin._internal.telemetry._send_snowpark_pandas_telemetry_helper"
+)
+@sql_count_checker(query_count=2)
+def test_snowpark_pandas_telemetry_standalone_function_decorator(
+    send_telemetry_mock,
+    session,
+    test_table_name,
+):
+    """
+    Test one of two telemetry decorators: snowpark_pandas_telemetry_standalone_function_decorator
+    """
+    session.create_dataframe([BASIC_TYPE_DATA1, BASIC_TYPE_DATA2]).write.save_as_table(
+        test_table_name, table_type="temp"
+    )
+    df = pd.read_snowflake(test_table_name)
+    assert df._query_compiler.snowpark_pandas_api_calls == [
+        {
+            "name": "pd_extensions.read_snowflake",
+        }
+    ]
+    send_telemetry_mock.assert_not_called()
+
+
+@sql_count_checker(query_count=0)
+def test_standalone_api_telemetry():
+    df = pd.Series(["1", "2", "3"])
+    newdf = pd.to_numeric(df)
+    assert df._query_compiler.snowpark_pandas_api_calls == [
+        {"name": "Series.property.name_set"}
+    ]
+    assert newdf._query_compiler.snowpark_pandas_api_calls == [
+        {
+            "name": "general.to_numeric",
+        }
+    ]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+def test_snowpark_pandas_telemetry_method_decorator(test_table_name):
+    """
+    Test one of two telemetry decorators: snowpark_pandas_telemetry_method_decorator
+    """
+    cond = pd.DataFrame([[True, False], [False, True]], columns=[1, 0])
+    other = pd.DataFrame([[5, 6], [7, 8]], columns=[1, 0])
+    df1 = pd.DataFrame([[1, 2], [3, 4]], index=[1, 0])
+    # Test in place lazy API: df1 api_call_list should contain lazy.
+    df1._query_compiler.snowpark_pandas_api_calls.clear()
+    df1._query_compiler.snowpark_pandas_api_calls = [{"name": "TestClass.test_func"}]
+    with SqlCounter(query_count=11, fallback_count=1, sproc_count=1):
+        # This is a fallback and inplace case
+        df1.where(cond, other, axis=1, inplace=True)
+
+    df1_expected_api_calls = [
+        {"name": "TestClass.test_func"},
+        {"is_fallback": True, "name": "DataFrame.where"},
+        {
+            "name": "DataFrame.BasePandasDataset.where",
+            "argument": ["other", "inplace", "axis"],
+        },
+        {
+            "name": "DataFrame.DataFrame.where",
+            "argument": ["other", "inplace", "axis"],
+        },
+    ]
+    assert df1._query_compiler.snowpark_pandas_api_calls == df1_expected_api_calls
+
+    # Test lazy APIs that are not in place: df1 api_call_list should not contain lazy but df2 should.
+    # And both should contain previous APIs
+    with SqlCounter(query_count=11, fallback_count=1, sproc_count=1):
+        df2 = df1.where(cond, other, axis=1)
+    assert df1._query_compiler.snowpark_pandas_api_calls == df1_expected_api_calls
+    df2_expected_api_calls = df1_expected_api_calls + [
+        {"is_fallback": True, "name": "DataFrame.where"},
+        {
+            "name": "DataFrame.BasePandasDataset.where",
+            "argument": ["other", "axis"],
+        },
+        {
+            "name": "DataFrame.DataFrame.where",
+            "argument": ["other", "axis"],
+        },
+    ]
+    assert df2._query_compiler.snowpark_pandas_api_calls == df2_expected_api_calls
+    # Clear connector telemetry client buffer to avoid flush triggered by the next API call, ensuring log extraction.
+    df1._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry.send_batch()
+    with SqlCounter(query_count=1):
+        df1.to_snowflake(test_table_name, index=False, if_exists="replace")
+
+    # eager api should not be collected in api_calls
+    assert df1._query_compiler.snowpark_pandas_api_calls == df1_expected_api_calls
+    # eager api should be sent as telemetry
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name="DataFrame.to_snowflake",
+        session=df1._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert set(data.keys()) == {"category", "api_calls", "sfqids", "func_name"}
+    assert data["category"] == "snowpark_pandas"
+    assert data["api_calls"] == df1_expected_api_calls + [
+        {
+            "name": "DataFrame.to_snowflake",
+            "argument": [
+                "if_exists",
+                "index",
+            ],
+        }
+    ]
+    assert len(data["sfqids"]) > 0
+    assert data["func_name"] == "DataFrame.to_snowflake"
+    # Test telemetry in python connector satisfy json format
+    telemetry_client = (
+        df1._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry
+    )
+    body = {"logs": [x.to_dict() for x in telemetry_client._log_batch]}
+    # If any previous REST request failed to send telemetry, telemetry_client._enabled would be set to False
+    assert (
+        telemetry_client._enabled
+    ), "Telemetry client should be enabled, likely because the previous REST request failed to send telemetry."
+    _ = json.dumps(body)
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+def test_snowpark_pandas_telemetry_fallback(test_table_name):
+    df = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+    fn = pandas.DataFrame.mod
+    method_name = fn.__name__
+    # This is coming from Modin's encoding scheme in default.py:build_default_to_pandas
+    # encoded as f"<function {cls.OBJECT_TYPE}.{fn_name}>".
+    # Not following this format will cause exception.
+    fn.__name__ = f"<function DataFrame.{method_name}>"
+    with SqlCounter(query_count=7, fallback_count=1, sproc_count=1):
+        new_query_compiler = df._query_compiler.default_to_pandas(fn, 2)
+    assert new_query_compiler.snowpark_pandas_api_calls == [
+        {"is_fallback": True, "name": "DataFrame.mod"}
+    ]
+
+    # Test NotImplementedError
+    fn.__name__ = f"<function Series.{method_name}>"
+    df._query_compiler.snowpark_pandas_api_calls.clear()
+    with SqlCounter(query_count=7, fallback_count=1, sproc_count=1):
+        new_query_compiler = df._query_compiler.default_to_pandas(fn, 2)
+    assert df._query_compiler.snowpark_pandas_api_calls == []
+    assert new_query_compiler.snowpark_pandas_api_calls == [
+        {"is_fallback": True, "name": "Series.mod"}
+    ]
+
+    # another fallback example
+    with SqlCounter(query_count=7, fallback_count=1, sproc_count=1):
+        df2 = df.dropna(axis=1)
+    assert df2._query_compiler.snowpark_pandas_api_calls == [
+        {"name": "DataFrame.dropna", "is_fallback": True},
+        {"name": "DataFrame.DataFrame.dropna", "argument": ["axis"]},
+    ]
+
+
+@patch.object(TelemetryClient, "send")
+@sql_count_checker(query_count=0)
+def test_send_snowpark_pandas_telemetry_helper(send_mock):
+    session = snowflake.snowpark.session._get_active_session()
+    _send_snowpark_pandas_telemetry_helper(
+        session=session,
+        telemetry_type="test_send_type",
+        func_name="test_send_func",
+        query_history=None,
+        api_calls=[],
+    )
+    send_mock.assert_called_with(
+        {
+            "source": "SnowparkPandas",
+            "version": ANY,
+            "python_version": ANY,
+            "operating_system": ANY,
+            "type": "test_send_type",
+            "data": {"func_name": "test_send_func", "category": "snowpark_pandas"},
+        }
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_not_equal_to_default():
+    # Test DataFrame type
+    df_none = pd.DataFrame()
+    df_empty = pd.DataFrame({})
+    assert _not_equal_to_default(df_none, df_none)
+    assert _not_equal_to_default(df_empty, df_empty)
+
+    # Test NoDefault and no_default
+    assert not _not_equal_to_default(NoDefault, NoDefault)
+    assert not _not_equal_to_default(no_default, no_default)
+
+    # Test different types
+    assert _not_equal_to_default(df_none, False)
+
+    # Test exception handling
+    class CustomTypeWithException:
+        def __eq__(self, other):
+            raise Exception("Equality exception")
+
+    assert not _not_equal_to_default(
+        CustomTypeWithException(), CustomTypeWithException()
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_telemetry_args():
+    def sample_function(
+        arg1_no_default_int: int,
+        arg2_no_default_bool: bool,
+        arg3_default_optional_zero: Optional[int] = 0,
+        arg4_default_none: Any = None,
+        arg5_simple_default: str = "arg5_default",
+        arg6_default_empty_str: str = "",
+        arg7_no_default_dataframe: Optional[DataFrame] = no_default,
+        arg8_nodefault_detaframe: Optional[DataFrame] = NoDefault,
+    ):
+        pass
+
+    # Test that non-defaulted arguments are not collected and defaulted arguments are collected
+    assert _try_get_kwargs_telemetry(
+        func=sample_function,
+        args=(1,),
+        kwargs={"arg2_no_default_bool": True, "arg3_default_optional_zero": 2},
+    ) == ["arg3_default_optional_zero"]
+
+    # Test that defaulted arguments overridden with a passed-in value that is the same as default are not collected
+    # and keyword non-defaulted arguments are not collected
+    assert (
+        _try_get_kwargs_telemetry(
+            func=sample_function,
+            args=(),
+            kwargs={
+                "arg1_no_default_int": 0,
+                "arg2_no_default_bool": False,
+                "arg3_default_optional_zero": 0,
+            },
+        )
+        == list()
+    )
+
+    # Test that defaulted to None or "" arguments are collected
+    assert _try_get_kwargs_telemetry(
+        func=sample_function,
+        args=(1, False, 3),
+        kwargs={
+            "arg5_simple_default": "test",
+            "arg4_default_none": {"test_key": "test_val"},
+            "arg6_default_empty_str": "test6",
+        },
+    ) == [
+        "arg3_default_optional_zero",
+        "arg4_default_none",
+        "arg5_simple_default",
+        "arg6_default_empty_str",
+    ]
+
+    # Test that defaulted arguments overridden with None are collected
+    assert _try_get_kwargs_telemetry(
+        func=sample_function,
+        args=(1, False, None),
+        kwargs={},
+    ) == ["arg3_default_optional_zero"]
+
+    # Test dataframe type argument with default value no_default and NoDefault are not collected
+    assert (
+        _try_get_kwargs_telemetry(
+            func=sample_function,
+            args=(1, False),
+            kwargs={},
+        )
+        == list()
+    )
+
+    # Test passing in default values, dataframe type argument with
+    # default value no_default and NoDefault are not collected
+    assert (
+        _try_get_kwargs_telemetry(
+            func=sample_function,
+            args=(1, False),
+            kwargs={
+                "arg7_no_default_dataframe": no_default,
+                "arg8_nodefault_detaframe": NoDefault,
+            },
+        )
+        == list()
+    )
+
+    # Test passing in non-default values, dataframe type argument with
+    # default value no_default and NoDefault are not collected
+    assert _try_get_kwargs_telemetry(
+        func=sample_function,
+        args=(1, False),
+        kwargs={
+            "arg7_no_default_dataframe": pd.DataFrame(),
+            "arg8_nodefault_detaframe": pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
+        },
+    ) == ["arg7_no_default_dataframe", "arg8_nodefault_detaframe"]
+
+
+@pytest.mark.xfail(
+    reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+    strict=True,
+    raises=RuntimeError,
+)
+@sql_count_checker(query_count=7, fallback_count=1, sproc_count=1)
+def test_property_methods_telemetry():
+    datetime_series = pd.date_range("2000-01-01", periods=3, freq="h")
+    ret_series = datetime_series.dt.timetz
+    assert len(ret_series._query_compiler.snowpark_pandas_api_calls) == 1
+    api_call = ret_series._query_compiler.snowpark_pandas_api_calls[0]
+    assert api_call["is_fallback"]
+    assert api_call["name"] == "Series.<property fget:timetz>"
+
+
+@sql_count_checker(query_count=1)
+def test_telemetry_with_update_inplace():
+    # verify api_calls have been collected correctly for APIs using _update_inplace() in base.py
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df.insert(1, "newcol", [99, 99, 90])
+    assert len(df._query_compiler.snowpark_pandas_api_calls) == 1
+    assert (
+        df._query_compiler.snowpark_pandas_api_calls[0]["name"]
+        == "DataFrame.DataFrame.insert"
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_telemetry_with_not_implemented_error():
+    # verify api_calls have been collected correctly for Resample APIs
+    mock_arg = MagicMock()
+    mock_arg._query_compiler.snowpark_pandas_api_calls = []
+    mock_arg.__class__.__name__ = "mock_class"
+
+    index = pandas.date_range("1/1/2000", periods=9, freq="min")
+    ser = pd.Series(range(9), index=index)
+    try:
+        ser.resample("3T").bfill()
+    except NotImplementedError:
+        pass
+
+    snowpark_pandas_error_test_helper(
+        func=snowpark_pandas_telemetry_method_decorator,
+        error=NotImplementedError,
+        telemetry_type=error_to_telemetry_type(
+            NotImplementedError("Method bfill is not implemented for Resampler!")
+        ),
+        loc_pref="mock_class",
+        mock_arg=mock_arg,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_telemetry_with_resample():
+    # verify api_calls have been collected correctly for Resample APIs
+    index = pandas.date_range("1/1/2000", periods=9, freq="min")
+    ser = pd.Series(range(9), index=index)
+    results = ser.resample("3T").sum()
+
+    assert len(results._query_compiler.snowpark_pandas_api_calls) == 2
+    # name_set happens in series __init__
+    assert (
+        results._query_compiler.snowpark_pandas_api_calls[0]["name"]
+        == "Series.property.name_set"
+    )
+    assert (
+        results._query_compiler.snowpark_pandas_api_calls[1]["name"]
+        == "Resampler.Resampler.sum"
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_telemetry_with_groupby():
+    # verify api_calls have been collected correctly for GroupBy APIs
+    df = pd.DataFrame(
+        {
+            "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
+            "Max Speed": [380.0, 370.0, 24.0, 26.0],
+        }
+    )
+    results = df.groupby(["Animal"]).mean()
+
+    assert len(results._query_compiler.snowpark_pandas_api_calls) == 1
+    assert (
+        results._query_compiler.snowpark_pandas_api_calls[0]["name"]
+        == "DataFrameGroupBy.DataFrameGroupBy.mean"
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_telemetry_with_rolling():
+    # verify api_calls have been collected correctly for Rolling APIs
+    df = pd.DataFrame({"A": ["h", "e", "l", "l", "o"], "B": [0, -1, 2.5, np.nan, 4]})
+    results = df.rolling(2, min_periods=1).sum(numeric_only=True)
+
+    assert len(results._query_compiler.snowpark_pandas_api_calls) == 1
+    assert (
+        results._query_compiler.snowpark_pandas_api_calls[0]["name"]
+        == "Rolling.Rolling.sum"
+    )
+
+
+@sql_count_checker(query_count=2, join_count=2)
+def test_telemetry_getitem_setitem():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    s = df["a"]
+    assert len(df._query_compiler.snowpark_pandas_api_calls) == 0
+    assert s._query_compiler.snowpark_pandas_api_calls == [
+        {"name": "DataFrame.BasePandasDataset.__getitem__"}
+    ]
+    df["a"] = 0
+    df["b"] = 0
+    assert df._query_compiler.snowpark_pandas_api_calls == [
+        {"name": "DataFrame.DataFrame.__setitem__"},
+        {"name": "DataFrame.DataFrame.__setitem__"},
+    ]
+    # Clear connector telemetry client buffer to avoid flush triggered by the next API call, ensuring log extraction.
+    s._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry.send_batch()
+    # This trigger eager evaluation and the messages should have been flushed to the connector, so we have to extract
+    # the telemetry log from the connector to validate
+    _ = s[0]
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name="Series.BasePandasDataset.__getitem__",
+        session=s._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert data["api_calls"] == [
+        {"name": "DataFrame.BasePandasDataset.__getitem__"},
+        {"name": "Series.BasePandasDataset.__getitem__"},
+    ]
+
+
+@pytest.mark.parametrize(
+    "name, method, expected_query_count",
+    [
+        ["__repr__", lambda df: df.__repr__(), 1],
+        ["__iter__", lambda df: df.__iter__(), 0],
+    ],
+)
+def test_telemetry_private_method(name, method, expected_query_count):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    # Clear connector telemetry client buffer to avoid flush triggered by the next API call, ensuring log extraction.
+    df._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry.send_batch()
+
+    with SqlCounter(query_count=expected_query_count):
+        method(df)
+    # This trigger eager evaluation and the messages should have been flushed to the connector, so we have to extract
+    # the telemetry log from the connector to validate
+
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name=f"DataFrame.DataFrame.{name}",
+        session=df._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert data["api_calls"] == [{"name": f"DataFrame.DataFrame.{name}"}]
+
+
+@sql_count_checker(query_count=3)
+def test_telemetry_property_index():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df._query_compiler.snowpark_pandas_api_calls.clear()
+    # Clear connector telemetry client buffer to avoid flush triggered by the next API call, ensuring log extraction.
+    df._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry.send_batch()
+    # This trigger eager evaluation and the messages should have been flushed to the connector, so we have to extract
+    # the telemetry log from the connector to validate
+    idx = df.index
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name="DataFrame.property.index_get",
+        session=df._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert data["api_calls"] == [
+        {"name": "DataFrame.property.index_get"},
+    ]
+
+    df.index = idx
+    assert df._query_compiler.snowpark_pandas_api_calls == [
+        {"name": "DataFrame.property.index_set"},
+    ]
+
+
+# TODO SNOW-996140: add telemetry for iloc/loc set
+@pytest.mark.parametrize(
+    "name, method, expected_query_count, expected_join_count",
+    [
+        ["iloc", lambda df: df.iloc[0, 0], 1, 2],
+        ["loc", lambda df: df.loc[0, "a"], 2, 2],
+    ],
+)
+def test_telemetry_property_iloc(
+    name, method, expected_query_count, expected_join_count
+):
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    df._query_compiler.snowpark_pandas_api_calls.clear()
+    # Clear connector telemetry client buffer to avoid flush triggered by the next API call, ensuring log extraction.
+    df._query_compiler._modin_frame.ordered_dataframe.session._conn._telemetry_client.telemetry.send_batch()
+    # This trigger eager evaluation and the messages should have been flushed to the connector, so we have to extract
+    # the telemetry log from the connector to validate
+    with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
+        _ = method(df)
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name=f"DataFrame.property.{name}_get",
+        session=df._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert data["api_calls"] == [
+        {"name": f"DataFrame.property.{name}_get"},
+    ]
+
+
+@sql_count_checker(query_count=1)
+def test_telemetry_repr():
+    s = pd.Series([1, 2, 3, 4])
+    s.__repr__()
+    data = _extract_snowpark_pandas_telemetry_log_data(
+        expected_func_name="Series.Series.__repr__",
+        session=s._query_compiler._modin_frame.ordered_dataframe.session,
+    )
+    assert data["api_calls"] == [
+        {"name": "Series.property.name_set"},
+        {"name": "Series.Series.__repr__"},
+    ]
diff --git a/tests/integ/modin/test_to_numpy.py b/tests/integ/modin/test_to_numpy.py
new file mode 100644
index 00000000000..c8697b88be8
--- /dev/null
+++ b/tests/integ/modin/test_to_numpy.py
@@ -0,0 +1,152 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+import logging
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from numpy.testing import assert_array_equal
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.utils import Utils
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1, 2],
+        [1, 2, None],
+        [1.1, 2.2],
+        [1.1, 2.2, np.nan],
+        ["snow", "flake"],
+        ["snow", "flake", None],
+        [True, False],
+        [True, False, None],
+        [bytes("snow", "utf-8"), bytes("flake", "utf-8")],
+        [bytes("snow", "utf-8"), bytes("flake", "utf-8"), None],
+        [datetime.date(2023, 1, 1), datetime.date(2023, 9, 15)],
+        [datetime.date(2023, 1, 1), datetime.date(2023, 9, 15), None],
+        [datetime.time(1, 2, 3, 1), datetime.time(0, 0, 0)],
+        [datetime.time(1, 2, 3, 1), datetime.time(0, 0, 0), None],
+        [datetime.datetime(2023, 1, 1), datetime.datetime(2023, 1, 1, 1, 2, 3)],
+        [datetime.datetime(2023, 1, 1), datetime.datetime(2023, 1, 1, 1, 2, 3), None],
+    ],
+)
+@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series"])
+@pytest.mark.parametrize("func", ["to_numpy", "values"])
+def test_to_numpy_basic(data, pandas_obj, func):
+    if pandas_obj == "Series":
+        df = pd.Series(data)
+        native_df = native_pd.Series(data)
+    else:
+        df = pd.DataFrame([data, data])
+        native_df = native_pd.DataFrame([data, data])
+    with SqlCounter(query_count=1):
+        if func == "to_numpy":
+            assert_array_equal(df.to_numpy(), native_df.to_numpy())
+        else:
+            assert_array_equal(df.values, native_df.values)
+    if pandas_obj == "Series":
+        with SqlCounter(query_count=1):
+            res = df.to_list()
+        expected_res = native_df.to_list()
+        for r1, r2 in zip(res, expected_res):
+            # native pandas series returns a list of pandas Timestamp,
+            # but Snowpark pandas returns a list of integers in ms.
+            # Their values are equal
+            if isinstance(r2, native_pd.Timestamp):
+                assert native_pd.Timestamp(r1) == r2
+            # can't compare np.nan directly
+            elif native_pd.isna(r2):
+                assert native_pd.isna(r1)
+            else:
+                assert r1 == r2
+
+
+@sql_count_checker(query_count=4)
+def test_tz_aware_data_to_numpy(session):
+    table_name = random_name_for_temp_object(TempObjectType.TABLE)
+    Utils.create_table(
+        session, table_name, "a timestamp_ltz, b timestamp_tz", is_temporary=True
+    )
+    session.sql(
+        f"insert into {table_name} values "
+        "('2023-01-01 00:00:01.001', '2023-01-01 00:00:01.001 +0000'), "
+        "('2023-12-31 23:59:59.999', '2023-12-31 23:59:59.999 +1000')"
+    ).collect()
+    expected_result = np.array(
+        [
+            [
+                pd.Timestamp(
+                    "2023-01-01 00:00:01.001000-0800", tz="America/Los_Angeles"
+                ),
+                pd.Timestamp(
+                    "2022-12-31 16:00:01.001000-0800", tz="America/Los_Angeles"
+                ),
+            ],
+            [
+                pd.Timestamp(
+                    "2023-12-31 23:59:59.999000-0800", tz="America/Los_Angeles"
+                ),
+                pd.Timestamp(
+                    "2023-12-31 05:59:59.999000-0800", tz="America/Los_Angeles"
+                ),
+            ],
+        ]
+    )
+    df = pd.read_snowflake(table_name)
+    assert_array_equal(df.to_numpy(), expected_result)
+
+
+@pytest.mark.parametrize("pandas_obj", ["DataFrame", "Series"])
+@sql_count_checker(query_count=1)
+def test_variant_data_to_numpy(pandas_obj):
+    data = [
+        1,
+        1.1,
+        "snow",
+        bytes("snow", "utf-8"),
+        datetime.date(2023, 1, 1),
+        datetime.time(1, 2, 3, 1),
+        datetime.datetime(2023, 1, 1),
+        [1, 2],
+        {"snow": "flake"},
+        None,
+    ]
+    expected_data = []
+    for e in data:
+        # This is how Snowflake encode these types of data in string format
+        if isinstance(e, bytes):
+            expected_data.append(e.hex())
+        elif isinstance(e, (datetime.date, datetime.time, datetime.datetime)):
+            expected_data.append(e.isoformat())
+        elif isinstance(e, datetime.datetime):
+            expected_data.append(e.strftime("%Y-%m-%dT%H:%M:%S"))
+        else:
+            expected_data.append(e)
+    df = getattr(pd, pandas_obj)(data)
+    expected_result = np.array(expected_data, dtype=object)
+    if pandas_obj == "DataFrame":
+        expected_result = expected_result.reshape(-1, 1)
+    assert_array_equal(df.to_numpy(), expected_result)
+
+
+@sql_count_checker(query_count=1)
+def test_to_numpy_copy_true(caplog):
+    series = pd.Series([1])
+
+    caplog.clear()
+    WarningMessage.printed_warnings.clear()
+    with caplog.at_level(logging.WARNING):
+        assert_array_equal(series.to_numpy(copy=True), native_pd.Series([1]).to_numpy())
+        assert "has been ignored by Snowpark pandas" in caplog.text
diff --git a/tests/integ/modin/test_unique.py b/tests/integ/modin/test_unique.py
new file mode 100644
index 00000000000..2dc1f638e67
--- /dev/null
+++ b/tests/integ/modin/test_unique.py
@@ -0,0 +1,275 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_values_equal, eval_snowpark_pandas_result
+
+
+def _make_nan_interleaved_float_series():
+    ser = native_pd.Series([1.2345] * 100)
+    ser[::2] = np.nan
+    return ser
+
+
+TEST_UNIQUE_SERIES_DATA = [
+    native_pd.Series([]),
+    native_pd.Series([2, 1, 3, 3]),
+    native_pd.Series([1, 1, 1, 1]),
+    native_pd.Series([12.0, 11.999999, 11.999999]),
+    native_pd.Series(["A", "A", "C", "C", "A"]),
+    native_pd.Series([None, "A", None, "B"]),
+    native_pd.Series([None] * 100),
+    _make_nan_interleaved_float_series(),
+    native_pd.Series(["A", 12, 56, "A"]),  # heterogeneous data
+]
+
+
+class TestUniqueUserDefinedClass:
+    # Class used to create a list of user defined objects for negative testing.
+    def __init__(self, int_param, float_param, str_param) -> None:
+        self.int_param = int_param
+        self.float_param = float_param
+        self.str_param = str_param
+
+
+# Tests based on Series.unique() tests.
+@pytest.mark.parametrize("native_series", TEST_UNIQUE_SERIES_DATA)
+@sql_count_checker(query_count=1)
+def test_unique_series(native_series: native_pd.Series):
+    # Check whether pd.unique works with pd.Series as input.
+    snowpark_series = pd.Series(native_series)
+    snowpark_unique = pd.unique(snowpark_series)
+    native_unique = native_pd.unique(native_series)
+    assert_values_equal(snowpark_unique, native_unique)
+
+
+@pytest.mark.parametrize("native_series", TEST_UNIQUE_SERIES_DATA)
+@sql_count_checker(query_count=1)
+def test_unique_series_reordered(native_series: native_pd.Series):
+    # Check whether pd.unique works with a reordered series as input to verify that the order of the elements returned
+    # by unique is the same as the order of appearance.
+    snowpark_series = pd.Series(native_series)
+    snowpark_unique = pd.unique(snowpark_series.iloc[::-1])
+    native_unique = native_pd.unique(native_series.iloc[::-1])
+    assert_values_equal(snowpark_unique, native_unique)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        native_pd.Index([]),
+        native_pd.Index([2, 1, 3, 3]),
+        native_pd.Index([1, 1, 1, 1]),
+        native_pd.Index([12.0, 11.999999, 11.999999]),
+        native_pd.Index(["A", "A", "C", "C", "A"]),
+        native_pd.Index([None, "A", None, "B"]),
+        native_pd.Index([None] * 100),
+        native_pd.Index(["A", 12, 56, "A"]),  # heterogeneous data
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_index(index):
+    # Check whether pd.unique works with pd.Index as input.
+    eval_snowpark_pandas_result(
+        pd, native_pd, lambda lib: lib.unique(index), comparator=assert_values_equal
+    )
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        np.array(list("baabc"), dtype="O"),
+        np.array(["this", "is", "a", "big", "big", "big", "list"]),
+        np.array([12, 13, 12, 13, 13, 42960, 1245]),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_ndarray(input_data: np.array):
+    # Check whether pd.unique works with np.ndarray as input.
+    eval_snowpark_pandas_result(
+        pd,
+        native_pd,
+        lambda lib: lib.unique(input_data),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "timestamp_index",
+    [
+        native_pd.Index(
+            [
+                native_pd.Timestamp("20160101", tz="US/Eastern"),
+                native_pd.Timestamp("20160101", tz="US/Eastern"),
+                native_pd.Timestamp("20160201", tz="US/Eastern"),
+                native_pd.Timestamp("20990101", tz="US/Eastern"),
+            ]
+        )
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_timestamp_index(timestamp_index):
+    # Check whether pd.unique works with a pd.Index of timestamps as input.
+    snowpark_unique = pd.unique(timestamp_index)
+    native_unique = native_pd.unique(timestamp_index)
+
+    # pd.unique(index) seems to return an array most of the time, but
+    # returns a datetime index in this case. It's supposed to return
+    # an index though:
+    # https://github.com/pandas-dev/pandas/issues/57043
+    # TODO(SNOW-1019312): Make snowpark_pandas.unique(index) always
+    # return an index.
+    assert_values_equal(
+        native_unique,
+        pd.Index(
+            [
+                native_pd.Timestamp("20160101", tz="US/Eastern"),
+                native_pd.Timestamp("20160201", tz="US/Eastern"),
+                native_pd.Timestamp("20990101", tz="US/Eastern"),
+            ]
+        ),
+    )
+
+    assert_values_equal(snowpark_unique, np.array(native_unique))
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        [1, 1, 1, 2, 2, 3],
+        [1.0, -1.0, 0, -1.0, 0],
+        ["foo", "bar", "baz", "foobar", "baz"],
+        [None, 1, 0.9, "0.8"],
+        [None] * 10,
+        [0.9] * 100,
+        [],
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_list(input_data: list):
+    # Check whether pd.unique works with a list as input.
+    eval_snowpark_pandas_result(
+        pd,
+        native_pd,
+        lambda lib: lib.unique(input_data),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        tuple([1, 2, -1, -2, -2, 0]),
+        ("first", "second", "third", "third"),
+        (1.0, -9.8, -0.006, 11.999999, -9.8),
+        tuple([None, None]),
+        tuple(),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_tuple(input_data: tuple):
+    # Check whether pd.unique works with a tuple as input.
+    eval_snowpark_pandas_result(
+        pd,
+        native_pd,
+        lambda lib: lib.unique(input_data),
+        comparator=assert_values_equal,
+    )
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [
+        bytearray(b"This string is a future bytearray!"),
+        bytearray(b""),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_unique_byte_array(input_data: bytearray):
+    # Check whether pd.unique works with a bytearray as input.
+    eval_snowpark_pandas_result(
+        pd,
+        native_pd,
+        lambda lib: lib.unique(input_data),
+        comparator=assert_values_equal,
+    )
+
+
+@sql_count_checker(query_count=1)
+def test_unique_list_of_tuples():
+    input = [("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]
+    # Native pandas returns a ndarray with tuples in it while Snowpark pandas returns a ndarray with lists.
+    # Native pandas returns a ndarray of shape (3,) - trying to construct it from np.array() directly yields a ndarray
+    # of shape (3, 1), therefore construct it from a native pandas series.
+    native_expected = native_pd.Series([("a", "b"), ("b", "a"), ("a", "c")]).to_numpy()
+    assert_values_equal(native_pd.unique(input), native_expected)
+
+    # Snowpark pandas returns a ndarray of shape (3,) - trying to construct it from np.array([[11], [12], [100]])
+    # yields a ndarray of shape (3, 1).
+    snowpark_expected = native_pd.Series(
+        [["a", "b"], ["b", "a"], ["a", "c"]]
+    ).to_numpy()
+    assert_values_equal(pd.unique(input), snowpark_expected)
+
+
+@sql_count_checker(query_count=1)
+def test_unique_list_of_lists():
+    input = [[11], [12], [12], [12], [100]]
+    # Native pandas raises TypeError when given a list of lists.
+    err_msg = "unhashable type: 'list'"
+    with pytest.raises(TypeError, match=err_msg):
+        native_pd.unique(input)
+
+    # Snowpark pandas returns a ndarray of shape (3,) - trying to construct it from np.array([[11], [12], [100]])
+    # yields a ndarray of shape (3, 1).
+    expected = native_pd.Series([[11], [12], [100]]).to_numpy()
+    assert_values_equal(pd.unique(input), expected)
+
+
+@pytest.mark.parametrize(
+    "input_data",
+    [{"first": None, "second": None, "third": []}, None, 12, "", "this is a string!"],
+)
+@sql_count_checker(query_count=0)
+def test_unique_non_list_like_object_negative(input_data):
+    with pytest.raises(
+        TypeError, match="Only list-like objects can be used with unique"
+    ):
+        assert_values_equal(pd.unique(input_data), "placeholder")
+
+
+@pytest.mark.parametrize(
+    "input_data, dtype",
+    [
+        (
+            [
+                TestUniqueUserDefinedClass(1, 0.9, "0.8"),
+                TestUniqueUserDefinedClass(1, 0.9, "0.8"),
+                TestUniqueUserDefinedClass(None, None, None),
+            ],
+            "TestUniqueUserDefinedClass",
+        ),
+        ([{1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}], "set"),
+        (
+            [
+                np.array([12, 13, 12, 13, 13, 42960, 1245]),
+                np.array([12, 13, 12, 13, 13, 42960, 1245]),
+                np.array([12, 13, 12, 14, 15, 42960, 1245]),
+            ],
+            "ndarray",
+        ),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_unique_list_of_invalid_objects_negative(input_data, dtype):
+    with pytest.raises(
+        TypeError, match=f"Object of type {dtype} is not JSON serializable"
+    ):
+        assert_values_equal(pd.unique(input_data), "placeholder")
diff --git a/tests/integ/modin/test_utils.py b/tests/integ/modin/test_utils.py
new file mode 100644
index 00000000000..488fde8ffa1
--- /dev/null
+++ b/tests/integ/modin/test_utils.py
@@ -0,0 +1,37 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pytest
+
+from snowflake.snowpark._internal.analyzer.analyzer_utils import quote_name
+from snowflake.snowpark._internal.utils import (
+    TempObjectType,
+    random_name_for_temp_object,
+)
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    create_ordered_dataframe_with_readonly_temp_table,
+)
+from tests.integ.modin.sql_counter import sql_count_checker
+
+
+@pytest.mark.parametrize("columns", [["A", "b", "C"], ['"a"', '"B"', '"c"']])
+@sql_count_checker(query_count=3)
+def test_create_snowpark_dataframe_with_readonly_temp_table(session, columns):
+    num_rows = 10
+    data = [[0] * len(columns) for _ in range(num_rows)]
+    test_table_name = random_name_for_temp_object(TempObjectType.TABLE)
+    snowpark_df = session.create_dataframe(data, schema=columns)
+    snowpark_df.write.save_as_table(test_table_name, mode="overwrite")
+
+    (
+        ordered_df,
+        row_position_quoted_identifier,
+    ) = create_ordered_dataframe_with_readonly_temp_table(test_table_name)
+
+    # verify the ordered df columns are row_position_quoted_identifier + quoted_identifiers
+    assert ordered_df.projected_column_snowflake_quoted_identifiers == [
+        row_position_quoted_identifier
+    ] + [quote_name(c) for c in columns]
+    assert [
+        row[0] for row in ordered_df.select(row_position_quoted_identifier).collect()
+    ] == list(range(num_rows))
diff --git a/tests/integ/modin/tools/__init__.py b/tests/integ/modin/tools/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/integ/modin/tools/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/integ/modin/tools/test_date_range.py b/tests/integ/modin/tools/test_date_range.py
new file mode 100644
index 00000000000..a1f3c4d0ba2
--- /dev/null
+++ b/tests/integ/modin/tools/test_date_range.py
@@ -0,0 +1,236 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import assert_snowpark_pandas_equal_to_pandas
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        # Specify start and end, with the default daily frequency.
+        {"start": "1/1/2018", "end": "1/08/2018", "name": "test"},
+        # Specify start and periods, the number of periods (days).
+        {"start": "1/1/2018", "periods": 8},
+        # Specify end and periods, the number of periods (days).
+        {"end": "1/1/2018", "periods": 8},
+        # Specify frequency to hour.
+        {"start": "1/1/2018", "end": "1/08/2018", "freq": pd.offsets.Hour(2)},
+        # Specify frequency to minute.
+        {"start": "1/1/2018", "end": "1/02/2018", "freq": "min"},
+        # Specify frequency to second.
+        {"start": "1/1/2018", "end": "1/02/2018", "freq": "s"},
+        # Specify frequency to millisecond.
+        {
+            "start": "1/1/2018 00:00:00.001",
+            "end": "1/1/2018 00:00:01.00123",
+            "freq": "ms",
+        },
+        # Specify frequency to microsecond.
+        {
+            "start": "1/1/2018 00:00:00.001",
+            "end": "1/1/2018 00:00:00.10123",
+            "freq": "us",
+        },
+        # Specify frequency to nanosecond.
+        {
+            "start": "1/1/2018 00:00:00.001",
+            "end": "1/1/2018 00:00:00.00123",
+            "freq": "ns",
+        },
+        # Specify freq to None
+        {
+            "start": "2017-01-01",
+            "periods": 19,
+            "freq": None,
+        },
+        # Specify freq and periods to None
+        {
+            "start": "1/1/2018 00:00:00.001",
+            "end": "1/1/2018 00:00:00.00123",
+            "periods": None,
+            "freq": None,
+        },
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_regular_range(kwargs):
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.date_range(**kwargs), native_pd.Series(native_pd.date_range(**kwargs))
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {
+            "start": "1/1/2018 00:00:00.001",
+            "end": "2/1/2019 00:00:00.10123",
+            "freq": "3ME",
+        },
+        {
+            "end": "3/1/2018 00:00:00.001",  # start < end
+            "start": "4/1/2019 00:00:00.10123",
+            "freq": "3ME",
+        },
+        {
+            "start": "2/29/2024",
+            "periods": 5,
+            "freq": "ME",
+        },
+        {
+            "start": "6/15/2018",
+            "periods": 5,
+            "freq": "MS",
+        },
+        {
+            "start": "7/15/2018",
+            "periods": 5,
+            "freq": "W",
+        },
+        {
+            "start": "8/15/2018",
+            "periods": 5,
+            "freq": "QS",
+        },
+        {
+            "start": "9/15/2018",
+            "periods": 5,
+            "freq": "QE",
+        },
+        {
+            "end": "10/10/2018",
+            "periods": 5,
+            "freq": "YS",
+        },
+        {
+            "end": "11/10/2018",
+            "periods": 5,
+            "freq": "Y",
+        },
+        {
+            "end": "12/10/2018",
+            "periods": 5,
+            "freq": "YE",
+        },
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_irregular_range(kwargs):
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.date_range(**kwargs), native_pd.Series(native_pd.date_range(**kwargs))
+    )
+
+
+@pytest.mark.parametrize(
+    "freq",
+    [
+        "B",  # business day frequency
+        "C",  # custom business day frequency
+        "SMS",  # semi-month start frequency (1st and 15th)
+        "BMS",  # business month start frequency
+        "CBMS",  # custom business month start frequency
+        "BQS",  # business quarter start frequency
+        "BYS",  # business year start frequency
+        "bh",  # business hour frequency
+        "cbh",  # custom business hour frequency
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_irregular_range_not_implemented(freq):
+    with pytest.raises(NotImplementedError):
+        pd.date_range(start="1/1/2018", periods=5, freq=freq)
+
+
+@sql_count_checker(query_count=1)
+@pytest.mark.parametrize("periods", [0, 1, 2, 5, 13])
+@pytest.mark.parametrize(
+    "inclusive",
+    [
+        "both",
+        "left",
+        "right",
+        "neither",
+    ],
+)
+def test_without_freq(periods, inclusive):
+    kwargs = {
+        "start": "2018-04-24",
+        "end": "2018-04-27",
+        "periods": periods,
+        "inclusive": inclusive,
+    }
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.date_range(**kwargs), native_pd.Series(native_pd.date_range(**kwargs))
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"start": "2017-01-01", "end": "2017-01-04"},
+        {"start": "1/1/2018", "periods": 5},
+        {"end": "1/1/2018", "periods": 5},
+    ],
+)
+@pytest.mark.parametrize(
+    "inclusive",
+    [
+        "both",
+        "left",
+        "right",
+        "neither",
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_inclusive(kwargs, inclusive):
+    kwargs.update({"inclusive": inclusive})
+    assert_snowpark_pandas_equal_to_pandas(
+        pd.date_range(**kwargs), native_pd.Series(native_pd.date_range(**kwargs))
+    )
+
+
+@pytest.mark.parametrize(
+    "kwargs, match",
+    [
+        [
+            {"periods": 5},
+            "Of the four parameters: start, end, periods, and freq, exactly three must be specified",
+        ],
+        [
+            {"start": pd.NaT, "end": "2010", "freq": "H"},
+            "Neither `start` nor `end` can be NaT",
+        ],
+        [
+            {"start": "2018-04-24", "end": "2018-04-27", "periods": -1},
+            "Number of samples, -1, must be non-negative.",
+        ],
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_value_error_negative(kwargs, match):
+    with pytest.raises(ValueError, match=match):
+        native_pd.date_range(**kwargs)
+
+    with pytest.raises(ValueError, match=match):
+        pd.date_range(**kwargs).to_pandas()
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        # Specify tz to set the timezone. TODO: SNOW-879476 support tz with other tz APIs
+        {"start": "1/1/2018", "periods": 5, "tz": "Asia/Tokyo"},
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_not_supported(kwargs):
+    with pytest.raises(NotImplementedError):
+        pd.date_range(**kwargs).to_pandas()
diff --git a/tests/integ/modin/tools/test_to_datetime.py b/tests/integ/modin/tools/test_to_datetime.py
new file mode 100644
index 00000000000..74be6299133
--- /dev/null
+++ b/tests/integ/modin/tools/test_to_datetime.py
@@ -0,0 +1,940 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+""" test to_datetime """
+
+import calendar
+import locale
+import re
+from datetime import datetime, timedelta, timezone
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pandas._testing as tm
+import pytest
+import pytz
+from modin.pandas import DatetimeIndex, Index, NaT, Series, Timestamp, to_datetime
+from pandas.core.arrays import DatetimeArray
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import (
+    SnowparkFetchDataException,
+    SnowparkSQLException,
+)
+from tests.integ.conftest import running_on_public_ci
+from tests.integ.modin.sql_counter import sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+
+
+@pytest.fixture(params=[True])
+def cache(request):
+    """
+    cache keyword to pass to to_datetime.
+    """
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def utc(request):
+    """
+    utc keyword to pass to to_datetime.
+    """
+    return request.param
+
+
+class TestTimeConversionFormats:
+    @pytest.mark.parametrize("readonly", [True, False])
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_readonly(self, readonly):
+        # GH#34857
+        arr = np.array([], dtype=object)
+        if readonly:
+            arr.setflags(write=False)
+        result = to_datetime(arr)
+        expected = Series([], dtype=object)
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("box", [Series, Index])
+    @pytest.mark.parametrize(
+        "format, expected",
+        [
+            [
+                "%d/%m/%Y",
+                [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")],
+            ],
+            [
+                "%m/%d/%Y",
+                [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")],
+            ],
+        ],
+    )
+    @sql_count_checker(query_count=4)
+    def test_to_datetime_format(self, cache, box, format, expected):
+        values = box(["1/1/2000", "1/2/2000", "1/3/2000"])
+        result = to_datetime(values, format=format, cache=cache)
+        expected = Series(expected)
+        assert_series_equal(result, expected)
+        # cache values is ignored at Snowpark pandas so only test here to make sure it works as well
+        result = to_datetime(values, format=format, cache=False)
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "arg, expected, format",
+        [
+            ["1/1/2000", "20000101", "%d/%m/%Y"],
+            ["1/1/2000", "20000101", "%m/%d/%Y"],
+            ["1/2/2000", "20000201", "%d/%m/%Y"],
+            ["1/2/2000", "20000102", "%m/%d/%Y"],
+            ["1/3/2000", "20000301", "%d/%m/%Y"],
+            ["1/3/2000", "20000103", "%m/%d/%Y"],
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_format_scalar(self, cache, arg, expected, format):
+        result = to_datetime(arg, format=format, cache=cache)
+        expected = Timestamp(expected)
+        assert result == expected
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @pytest.mark.parametrize(
+        "arg, format",
+        [
+            ["1/1/2000", "%d/%w/%Y"],
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_to_datetime_format_fallback(self, cache, arg, format):
+        assert to_datetime(arg, format=format, cache=cache) == native_pd.to_datetime(
+            arg, format=format, cache=cache
+        )
+
+    @pytest.mark.parametrize(
+        "arg, format",
+        [
+            ["1-1-2000", "%d/%m/%Y"],
+        ],
+    )
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_format_not_match(self, cache, arg, format):
+        with pytest.raises(
+            SnowparkSQLException,
+            match=f"Can't parse '{arg}' as timestamp with format 'DD/MM/YYYY'",
+        ):
+            to_datetime(arg, format=format, cache=cache)
+
+    @sql_count_checker(query_count=2, udf_count=0)
+    def test_to_datetime_format_YYYYMMDD(self, cache):
+        data = [19801222, 19801222] + [19810105] * 5
+        ser = Series(data)
+        native_ser_str = native_pd.Series(data).apply(str)
+        expected = native_pd.Series([Timestamp(x) for x in native_ser_str])
+
+        result = to_datetime(ser, format="%Y%m%d", cache=cache)
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+        ser = Series(native_ser_str)
+        result = to_datetime(ser, format="%Y%m%d", cache=cache)
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_format_YYYYMMDD_with_nat(self):
+        ser = Series([19801222, 19801222, np.nan] + [19810105] * 5)
+        # with NaT
+        expected = native_pd.Series(
+            [Timestamp("19801222"), Timestamp("19801222"), pd.NaT]
+            + [Timestamp("19810105")] * 5
+        )
+        result = to_datetime(ser, format="%Y%m%d")
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    @pytest.mark.parametrize(
+        "nat_str", ["nan", "NAN", "NaN", "nAn", "nAt", "nat", "NaT", "NAT"]
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_format_YYYYMMDD_with_str_nat(self, nat_str):
+        ser2 = Series(["19801222", "19801222", nat_str] + ["19810105"] * 5)
+        result = to_datetime(ser2, format="%Y%m%d")
+
+        expected = native_pd.Series(
+            [Timestamp("19801222"), Timestamp("19801222"), pd.NaT]
+            + [Timestamp("19810105")] * 5
+        )
+        assert_snowpark_pandas_equal_to_pandas(result, expected)
+
+    @pytest.mark.xfail(
+        strict=True,
+        reason="SNOW-1170304: out of bounds datetime convert to datetime64[us] instead of raising error",
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
+        out_of_bound_sample = [20121231, 20141231, 99991231]
+        # When errors="ignore", the values are transparently passed through
+        expected = native_pd.Series(
+            [datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)],
+            dtype=object,
+        )
+        assert_series_equal(
+            native_pd.to_datetime(
+                native_pd.Series(out_of_bound_sample),
+                format="%Y%m%d",
+                errors="ignore",
+                cache=cache,
+            ),
+            expected,
+        )
+
+        snow = to_datetime(
+            pd.Series(out_of_bound_sample),
+            format="%Y%m%d",
+            errors="ignore",
+            cache=cache,
+        )
+        # TODO SNOW-894362 Snowflake Python Connector does not support pulling out of bound datetime. If a datetime is
+        # out of bound, pandas use Python datetime object to hold the value instead of numpy datetime64[ns]. The
+        # Snowflake Python Connector does not support this today. It will raise an error.
+        with pytest.raises(
+            SnowparkFetchDataException,
+            match=re.escape(
+                "Casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp"
+            ),
+        ):
+            snow.to_pandas()
+
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_format_YYYYMMDD_coercion(self, cache):
+        # coercion
+        # GH 7930
+        ser = Series([20121231, 20141231, 99991231])
+        result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache)
+        expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]")
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "input_s",
+        [
+            # Null values with Strings
+            ["19801222", "20010112", None],
+            ["19801222", "20010112", np.nan],
+            ["19801222", "20010112", "NaT"],
+            # Null values with Integers
+            [19801222, 20010112, None],
+            [19801222, 20010112, np.nan],
+            [19801222, 20010112, NaT],
+            [19801222, 20010112, "NaT"],
+        ],
+    )
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_format_YYYYMMDD_with_none(self, input_s):
+        # GH 30011
+        # format='yyyymmdd'
+        # with None
+        expected = Series([Timestamp("19801222"), Timestamp("20010112"), NaT])
+        result = Series(to_datetime(input_s, format="%Y%m%d"))
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "input, expected",
+        [
+            # NaN before strings with invalid date values
+            [
+                ["19801222", np.nan, "20010012", "10019999"],
+                [Timestamp("19801222"), np.nan, np.nan, np.nan],
+            ],
+            # NaN after strings with invalid date values
+            [
+                ["19801222", "20010012", "10019999", np.nan],
+                [Timestamp("19801222"), np.nan, np.nan, np.nan],
+            ],
+            # NaN before integers with invalid date values
+            [
+                [20190813, np.nan, 20010012, 20019999],
+                [Timestamp("20190813"), np.nan, np.nan, np.nan],
+            ],
+            # NaN after integers with invalid date values
+            [
+                [20190813, 20010012, np.nan, 20019999],
+                [Timestamp("20190813"), np.nan, np.nan, np.nan],
+            ],
+        ],
+    )
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_format_YYYYMMDD_overflow(self, input, expected):
+        # GH 25512
+        # format='yyyymmdd', errors='coerce'
+        result = to_datetime(Series(input), format="%Y%m%d", errors="coerce")
+        assert_series_equal(result, Series(expected))
+
+    @pytest.mark.parametrize(
+        "data, format, expected",
+        [
+            ([pd.NA], "yyyymmdd%H%M%S", DatetimeIndex(["NaT"])),
+            ([pd.NA], None, DatetimeIndex(["NaT"])),
+            (
+                [pd.NA, "20210202202020"],
+                "yyyymmdd%H%M%S",
+                DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
+            ),
+            (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
+            (
+                ["201010", pd.NA],
+                None,
+                DatetimeIndex(["1970-01-03 07:50:10", "NaT"]),
+            ),  # different from pandas "2010-10-20"
+            ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
+            ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
+        ],
+    )
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_with_NA(self, data, format, expected):
+        # GH#42957
+        result = to_datetime(data, format=format)
+        assert_series_equal(result, Series(expected))
+
+    @sql_count_checker(query_count=1, udf_count=0)
+    def test_to_datetime_format_integer_year_only(self, cache):
+        # GH 10178
+        data = [2000, 2001, 2002]
+        ser = Series(data)
+        native_ser = native_pd.Series(data)
+        expected = native_pd.Series([Timestamp(x) for x in native_ser.apply(str)])
+
+        result = to_datetime(ser, format="%Y", cache=cache)
+        assert_series_equal(result, expected, check_index_type=False)
+
+    @sql_count_checker(query_count=1, udf_count=0)
+    def test_to_datetime_format_integer_year_month(self, cache):
+        data = [200001, 200105, 200206]
+        ser = Series(data)
+
+        native_ser = native_pd.Series(data)
+        expected = native_pd.Series(
+            [Timestamp(x[:4] + "-" + x[4:]) for x in native_ser.apply(str)]
+        )
+        result = to_datetime(ser, format="%Y%m", cache=cache)
+        assert_series_equal(result, expected, check_index_type=False)
+
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_format_microsecond(self, cache):
+        month_abbr = calendar.month_abbr[4]
+        val = f"01-{month_abbr}-2011 00:00:01.978"
+
+        format = "%d-%b-%Y %H:%M:%S.%f"
+        result = to_datetime(val, format=format, cache=cache)
+        exp = np.datetime64(datetime.strptime(val, format))
+        assert result == exp
+
+    @pytest.mark.parametrize(
+        "value, format, dt",
+        [
+            ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")],
+            ["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")],
+            [
+                "01/10/2010 13:56:01",
+                "%m/%d/%Y %H:%M:%S",
+                Timestamp("2010-01-10 13:56:01"),
+            ],
+            # The 3 tests below are locale-dependent.
+            # They pass, except when the machine locale is zh_CN or it_IT .
+            pytest.param(
+                "01/10/2010 08:14 PM",
+                "%m/%d/%Y %I:%M %p",
+                Timestamp("2010-01-10 20:14"),
+                marks=pytest.mark.xfail(
+                    locale.getlocale()[0] in ("zh_CN", "it_IT"),
+                    reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
+                    strict=False,
+                ),
+            ),
+            pytest.param(
+                "01/10/2010 07:40 AM",
+                "%m/%d/%Y %I:%M %p",
+                Timestamp("2010-01-10 07:40"),
+                marks=pytest.mark.xfail(
+                    locale.getlocale()[0] in ("zh_CN", "it_IT"),
+                    reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
+                    strict=False,
+                ),
+            ),
+            pytest.param(
+                "01/10/2010 09:12:56 AM",
+                "%m/%d/%Y %I:%M:%S %p",
+                Timestamp("2010-01-10 09:12:56"),
+                marks=pytest.mark.xfail(
+                    locale.getlocale()[0] in ("zh_CN", "it_IT"),
+                    reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",
+                    strict=False,
+                ),
+            ),
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_format_time(self, cache, value, format, dt):
+        assert to_datetime(value, format=format, cache=cache) == dt
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @sql_count_checker(query_count=16, fallback_count=2, sproc_count=2)
+    def test_to_datetime_with_non_exact_fallback(self, cache):
+        # GH 10834
+        # 8904
+        # exact kw
+        ser = Series(
+            ["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"]
+        )
+        result = to_datetime(ser, format="%d%b%y", exact=False, cache=cache)
+        expected = to_datetime(
+            ser.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache
+        )
+        assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "arg",
+        [
+            "2012-01-01 09:00:00.000000001",
+            "2012-01-01 09:00:00.000001",
+            "2012-01-01 09:00:00.001",
+            "2012-01-01 09:00:00.001000",
+            "2012-01-01 09:00:00.001000000",
+        ],
+    )
+    @sql_count_checker(query_count=2)
+    def test_parse_nanoseconds_with_formula(self, cache, arg):
+
+        # GH8989
+        # truncating the nanoseconds when a format was provided
+        expected = to_datetime(arg, cache=cache)
+        result = to_datetime(arg, format="%Y-%m-%d %H:%M:%S.%f", cache=cache)
+        assert result == expected
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize(
+        "value,fmt,expected",
+        [
+            ["2009324", "%Y%W%w", Timestamp("2009-08-13")],
+            ["2013020", "%Y%U%w", Timestamp("2013-01-13")],
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
+        assert to_datetime(value, format=fmt, cache=cache) == expected
+
+    @pytest.mark.parametrize(
+        "fmt,dates,expected_dates",
+        [
+            [
+                "%Y-%m-%d %H:%M:%S%z",
+                ["2010-01-01 12:00:00+0100"] * 2,
+                [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2,
+            ],
+            [
+                "%Y-%m-%d %H:%M:%S %z",
+                ["2010-01-01 12:00:00 +0100"] * 2,
+                [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2,
+            ],
+            [
+                "%Y-%m-%d %H:%M:%S %z",
+                ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"],
+                [
+                    Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)),
+                    Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)),
+                ],
+            ],
+            [
+                "%Y-%m-%d %H:%M:%S %z",
+                ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"],
+                [
+                    Timestamp(
+                        "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)
+                    ),  # pytz coerces to UTC
+                    Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)),
+                ],
+            ],
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates):
+        # GH 13486
+        result = to_datetime(dates, format=fmt).to_list()
+        # with SqlCounter(query_count=1):
+        tm.assert_equal(result, expected_dates)
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.parametrize(
+        "fmt,dates,expected_dates",
+        [
+            [
+                "%Y-%m-%d %H:%M:%S %Z",  # %Z is not supported
+                ["2010-01-01 12:00:00 UTC"] * 2,
+                [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2,
+            ],
+            [
+                "%Y-%m-%d %H:%M:%S %Z",  # %Z is not supported
+                [
+                    "2010-01-01 12:00:00 UTC",
+                    "2010-01-01 12:00:00 GMT",
+                    "2010-01-01 12:00:00 US/Pacific",
+                ],
+                [
+                    Timestamp("2010-01-01 12:00:00", tz="UTC"),
+                    Timestamp("2010-01-01 12:00:00", tz="GMT"),
+                    Timestamp("2010-01-01 12:00:00", tz="US/Pacific"),
+                ],
+            ],
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    def test_to_datetime_parse_tzname_or_tzoffset_fallback(
+        self, fmt, dates, expected_dates
+    ):
+        # GH 13486
+        result = to_datetime(dates, format=fmt).to_list()
+        # with SqlCounter(query_count=1):
+        tm.assert_equal(result, expected_dates)
+
+    @sql_count_checker(query_count=4)
+    def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self):
+        # GH 32792
+        dates = [
+            "2010-01-01 12:00:00 +0100",
+            "2010-01-01 12:00:00 -0100",
+            "2010-01-01 12:00:00 +0300",
+            "2010-01-01 12:00:00 +0400",
+        ]
+        expected_dates = [
+            Timestamp("2010-01-01 11:00:00+00:00"),
+            Timestamp("2010-01-01 13:00:00+00:00"),
+            Timestamp("2010-01-01 09:00:00+00:00"),
+            Timestamp("2010-01-01 08:00:00+00:00"),
+        ]
+        fmt = "%Y-%m-%d %H:%M:%S %z"
+
+        result = to_datetime(dates, format=fmt, utc=True)
+        expected = Series(expected_dates)
+        assert_series_equal(result, expected)
+        result2 = to_datetime(dates, utc=True)
+        assert_series_equal(result2, expected)
+
+    @pytest.mark.parametrize(
+        "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]
+    )
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_parse_timezone_malformed(self, offset):
+        fmt = "%Y-%m-%d %H:%M:%S %z"
+        date = "2010-01-01 12:00:00 " + offset
+
+        # pandas will raise a VauleError with error msg = "does not match format|unconverted data remains"
+        with pytest.raises(
+            SnowparkSQLException,
+            match="Can't parse|as timestamp with format 'YYYY-MM-DD HH24:MI:SS TZHTZM'",
+        ):
+            to_datetime([date], format=fmt).to_pandas()
+
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_parse_timezone_keeps_name(self):
+        # GH 21697
+        fmt = "%Y-%m-%d %H:%M:%S %z"
+        arg = Index(["2010-01-01 12:00:00 Z"], name="foo")
+        result = to_datetime(arg, format=fmt)
+        assert result.name == arg.name
+
+
+class TestToDatetime:
+    @sql_count_checker(query_count=3)
+    def test_to_datetime_mixed_datetime_and_string(self):
+        d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
+        d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
+        res = to_datetime(["2020-01-01 17:00:00 -0100", d2])
+        # The input will become a series with variant type and the timezone is unaware by the Snowflake engine, so the
+        # result ignores the timezone by default
+        expected = native_pd.Series(
+            [datetime(2020, 1, 1, 17), datetime(2020, 1, 1, 18)]
+        )
+        assert_series_equal(res, expected, check_dtype=False, check_index_type=False)
+        # Set utc=True to make sure timezone aware in to_datetime
+        res = to_datetime(["2020-01-01 17:00:00 -0100", d2], utc=True)
+        expected = pd.Series([d1, d2])
+        assert_series_equal(res, expected, check_dtype=False, check_index_type=False)
+
+    @pytest.mark.parametrize(
+        "tz",
+        [
+            None,
+            pytest.param("US/Central"),
+        ],
+    )
+    @sql_count_checker(query_count=2)
+    def test_to_datetime_dtarr(self, tz):
+        # DatetimeArray
+        dti = native_pd.date_range("1965-04-03", periods=19, freq="2W", tz=tz)
+        arr = DatetimeArray(dti)
+        assert_series_equal(
+            to_datetime(arr),
+            Series(arr),
+            check_dtype=False,
+        )
+
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_pydatetime(self):
+        actual = to_datetime(datetime(2008, 1, 15))
+        assert actual == np.datetime64(datetime(2008, 1, 15))
+
+    @pytest.mark.parametrize(
+        "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_dt64s(self, cache, dt):
+        assert to_datetime(dt, cache=cache) == Timestamp(dt)
+
+    @pytest.mark.parametrize(
+        "sample",
+        [
+            {"year": [2015, 2016], "month": [2, 3], "day": [4, 5]},  # minimal keys
+            {
+                "yEaR": [2015, 2016],  # case insensitive
+                "month": [2, 3],
+                "day": [4, 1],
+                "minute": [1, 2],
+                "second": [0, -1],
+                "ms": [1, 2],
+                "us": [-1, 2],
+                "ns": [1, 2],
+            },  # full keys
+            {
+                "yEaR": [2015, 2016],
+                "month": [2, 3],
+                "day": [4, 1],
+                "minute": [1, 2],
+                "second": [0, -1],
+                "ms": [1, 2],
+            },  # ms, us, ns is optinal
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_df(self, sample):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample),
+            native_pd.DataFrame(sample),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+        )
+
+    @pytest.mark.parametrize(
+        "sample",
+        [
+            {"year": [2015, 2016], "month": [2, 3], "day": [4, 5]},  # minimal keys
+            {"years": [2015, 2016], "months": [2, 3], "days": [4, 5]},  # plurals OK
+            {
+                "years": [2015, 2016],
+                "year": [2000, 2001],
+                "months": [2, 3],
+                "days": [4, 5],
+                "seconds": [300, 400],
+                "second": [700, 800],
+                "s": [100, 200],
+            },  # if same key has plural/abbreviated form, use the last specified key
+            {
+                "yEaR": [2015, 2016],  # case insensitive
+                "month": [2, 3],
+                "day": [4, 1],
+                "minute": [1, 2],
+                "second": [0, -1],
+                "ms": [1, 2],
+                "us": [-1, 2],
+                "ns": [1, 2],
+            },  # full keys
+            {
+                "yEaR": [2015, 2016],
+                "month": [2, 3],
+                "day": [4, 1],
+                "minute": [1, 2],
+                "second": [0, -1],
+                "ms": [1, 2],
+            },  # ms, us, ns is optional
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_dict(self, sample):
+        assert_snowpark_pandas_equal_to_pandas(
+            pd.to_datetime(sample),
+            native_pd.to_datetime(sample),
+        )
+
+    @pytest.mark.xfail(
+        reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+        strict=True,
+        raises=RuntimeError,
+    )
+    @pytest.mark.skipif(running_on_public_ci(), reason="slow fallback test")
+    @pytest.mark.parametrize(
+        "sample",
+        [
+            {
+                "year": ["2015", "2016"],
+                "month": [2.0, 3.1],
+                "day": [4.9, 0],
+            },  # non int types
+        ],
+    )
+    @sql_count_checker(query_count=8, fallback_count=1, sproc_count=1)
+    def test_to_datetime_df_fallback(self, sample):
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample),
+            native_pd.DataFrame(sample),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+        )
+
+    @pytest.mark.parametrize(
+        "origin,unit",
+        [
+            (pd.Timestamp("1960-01-01"), "D"),
+            (pd.Timestamp("1960-01-01"), "s"),
+            (2000, "ns"),
+            (1e6, "us"),
+            (1e3, "ms"),
+            (1e6, "ms"),
+            (1e6, "s"),
+            (1000, "D"),
+            ("2000-01-01", "D"),
+            ("2000-01-01", "us"),
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_origin(self, origin, unit):
+        sample = [1, 2, 3]
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda df: pd.to_datetime(df, unit=unit, origin=origin)
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, unit=unit, origin=origin),
+        )
+
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_origin_negative(self):
+        # `origin` argument raises an error for non-numeric dataframes
+        sample_dict = {"year": [2000], "month": [3], "day": [1]}
+        origin = 1e9
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample_dict),
+            native_pd.DataFrame(sample_dict),
+            lambda df: pd.to_datetime(df, origin=origin)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df, origin=origin),
+            expect_exception=True,
+            expect_exception_type=TypeError,
+            expect_exception_match="arg must be a string, datetime, list, tuple, 1-d array, or Series",
+        )
+
+        sample = [1, 2, 3]
+        origin_complex = 1j
+        # complex values are invalid
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda df: pd.to_datetime(df, origin=origin_complex)
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, origin=origin_complex),
+            expect_exception=True,
+            expect_exception_type=TypeError,
+            expect_exception_match=re.escape(
+                "Cannot convert input [1j] of type <class 'complex'> to Timestamp"
+            ),
+        )
+
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_df_negative(self):
+        sample_empty = {}
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample_empty),
+            native_pd.DataFrame(sample_empty),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=re.escape(
+                "to assemble mappings requires at least that [year, month, day] be specified: [day,month,year] is missing"
+            ),
+        )
+
+        sample_wo_year = {"month": [2, 3], "day": [4, 5]}
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample_wo_year),
+            native_pd.DataFrame(sample_wo_year),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=re.escape(
+                "to assemble mappings requires at least that [year, month, day] be specified: [year] is missing"
+            ),
+        )
+
+        sample_wo_year_month_day = {"ns": [2, 3], "ms": [4, 5]}
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample_wo_year_month_day),
+            native_pd.DataFrame(sample_wo_year_month_day),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=re.escape(
+                "to assemble mappings requires at least that [year, month, day] be specified: [day,month,year] is missing"
+            ),
+        )
+
+        sample_extra = {
+            "year": [2015, 2016],
+            "month": [2, 3],
+            "day": [4, 5],
+            "xxx": [4, 5],
+        }
+        eval_snowpark_pandas_result(
+            pd.DataFrame(sample_extra),
+            native_pd.DataFrame(sample_extra),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.DataFrame)
+            else native_pd.to_datetime(df),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match=re.escape(
+                "extra keys have been passed to the datetime assemblage: [xxx]"
+            ),
+        )
+
+        sample_2d_array = [[1]]
+        # we don't match pandas here since pandas raises an AttributeError: 'int' object has no attribute 'lower'
+        with pytest.raises(
+            ValueError,
+            match=re.escape(
+                "extra keys have been passed to the datetime assemblage: [0]"
+            ),
+        ):
+            pd.to_datetime(pd.DataFrame(sample_2d_array))
+
+    @pytest.mark.parametrize(
+        "sample",
+        [
+            {"arg": 86400, "unit": "D"},
+            {"arg": 1490195805, "unit": "s"},
+            {"arg": 1490195805433, "unit": "ms"},
+            {"arg": 1490195805433502, "unit": "us"},
+            {"arg": 1490195805433502912, "unit": "ns"},
+        ],
+    )
+    @sql_count_checker(query_count=1)
+    def test_to_datetime_unit(self, sample):
+        assert pd.to_datetime(
+            sample["arg"], unit=sample["unit"]
+        ) == native_pd.to_datetime(sample["arg"], unit=sample["unit"])
+
+    @sql_count_checker(query_count=0)
+    def test_to_datetime_unit_negative(self):
+        invalid_unit = "NS"
+        eval_snowpark_pandas_result(
+            pd.Series([1490195805, 1490195805]),
+            native_pd.Series([1490195805, 1490195805]),
+            lambda df: pd.to_datetime(df, unit=invalid_unit)
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, unit=invalid_unit),
+            expect_exception=True,
+            expect_exception_type=ValueError,
+            expect_exception_match="Unrecognized unit NS",
+            assert_exception_equal=False,  # pandas may raise either ValueError or OutOfBoundsDatetime
+        )
+
+    @sql_count_checker(query_count=0)
+    def test_none(self):
+        assert pd.to_datetime(None) == native_pd.to_datetime(None)
+
+    @sql_count_checker(query_count=0)
+    def test_bool(self):
+        eval_snowpark_pandas_result(
+            pd.Series([True, False]),
+            native_pd.Series([True, False]),
+            lambda df: pd.to_datetime(df)
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df),
+            expect_exception=True,
+            expect_exception_type=TypeError,
+            expect_exception_match=re.escape(
+                "dtype bool cannot be converted to datetime64[ns]"
+            ),
+        )
+
+    @pytest.mark.xfail(
+        strict=True,
+        reason="SNOW-1170304: out of bounds datetime convert to datetime64[us] instead of raising error",
+    )
+    @sql_count_checker(query_count=10, fallback_count=1, sproc_count=1)
+    def test_out_of_bound(self):
+        sample = ["13000101"]
+        # both pandas and Snowpark pandas raise exception. pandas will raise an out of bound datetime exception while
+        # Snowpark pandas raises SnowparkFetchDataException because Snowflake backend supports those out of bound timestamps
+        # but Snowflake Python connector that failed to fetch it and build a pandas dataframe.
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda df: pd.to_datetime(df, format="%Y%m%d")
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, format="%Y%m%d"),
+            expect_exception=True,
+            expect_exception_type=SnowparkFetchDataException,
+            expect_exception_match=re.escape("Failed to fetch a pandas Dataframe"),
+            assert_exception_equal=False,
+        )
+
+        # change errors from raises to coerce and both system should return NaT
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda df: pd.to_datetime(df, format="%Y%m%d", errors="coerce")
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, format="%Y%m%d", errors="coerce"),
+        )
+
+        # change errors to "ignore":
+        # based on pandas' doc: "If a date does not meet the timestamp limitations, passing errors='ignore' will return
+        # the original input instead of raising any exception.", so this should return the original value but here it
+        # shows a bug which returns a datetime
+        assert native_pd.to_datetime(
+            native_pd.Series(sample), format="%Y%m%d", errors="ignore"
+        ).to_list()[0] == datetime(year=1300, month=1, day=1)
+        # if we change the sample to below case, it returns original values
+        sample = ["13000101", "abc"]
+        eval_snowpark_pandas_result(
+            pd.Series(sample),
+            native_pd.Series(sample),
+            lambda df: pd.to_datetime(df, format="%Y%m%d", errors="ignore")
+            if isinstance(df, pd.Series)
+            else native_pd.to_datetime(df, format="%Y%m%d", errors="ignore"),
+        )
diff --git a/tests/integ/modin/tools/test_to_numeric.py b/tests/integ/modin/tools/test_to_numeric.py
new file mode 100644
index 00000000000..4ee57002ef5
--- /dev/null
+++ b/tests/integ/modin/tools/test_to_numeric.py
@@ -0,0 +1,390 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import logging
+from datetime import date, time
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.exceptions import SnowparkSQLException
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import (
+    assert_series_equal,
+    assert_snowpark_pandas_equal_to_pandas,
+    eval_snowpark_pandas_result,
+)
+from tests.utils import Utils
+
+
+@pytest.fixture(params=[None, "integer", "signed", "unsigned", "float"])
+def downcast(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        pytest.param(
+            "ignore",
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        "raise",
+        "coerce",
+    ]
+)
+def errors(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "input,dtype,expected_dtype",
+    [
+        (None, None, "float64"),
+        # for the following 3 parameter sets, note that pandas converts an
+        # empty non-numeric series to int64, but we always convert to double
+        # for non-numeric types, e.g., string, because it is nontrivial to
+        # check whether the values are integer only, and we don't want to do a
+        # length check.
+        ([], None, "float64"),
+        ([], str, "float64"),
+        ([], "object", "float64"),
+        ([], "datetime64[ns]", "int64"),
+        ([1, 2, 3], None, "int64"),
+        ([1.3, 2.2, -3.14], None, "float64"),
+        # string
+        (
+            ["1", "2", "3"],
+            None,
+            "float64",
+        ),  # <- deviate from pandas' behavior, pandas returns i64
+        (["1.1", "-3.14", "-3"], None, "float64"),
+        # variant
+        (["1.1", "2", -3], None, "float64"),
+        (["1.0", "", -3], None, "float64"),
+        # Bool is regarded as numeric.
+        ([True, False, True, True], None, "bool"),
+        (
+            [1.3, np.nan, 2.2, None, pd.NA, -3.14],
+            None,
+            "float64",
+        ),  # <- deviate from pandas' behavior, pandas returns object
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_series_to_numeric(input, dtype, expected_dtype):
+    snow_series = pd.Series(input, dtype=dtype)
+    native_series = native_pd.Series(input, dtype=dtype)
+
+    # When input is None, the snow series index dtype is object which will trigger an extra query to get its value type
+    # in to_pandas(). This will be improved in SNOW-933782
+    eval_snowpark_pandas_result(
+        snow_series,
+        native_series,
+        lambda s: pd.to_numeric(s)
+        if isinstance(s, pd.Series)
+        else native_pd.to_numeric(s),
+    )
+    assert pd.to_numeric(snow_series).dtype == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "input, dtype",
+    [
+        (1, "int64"),
+        (1.3, "float64"),
+        ("1.1", "float64"),
+        ("-3", "float64"),
+        ("1.0", "float64"),
+        ("", "float64"),
+        (None, "float64"),
+        (np.nan, "float64"),
+        (np.inf, "float64"),
+        ("inf", "float64"),
+        (True, "bool"),
+    ],
+)
+@sql_count_checker(query_count=2)
+def test_scalar_to_numeric(input, dtype):
+    snow = pd.to_numeric(input)
+    assert snow.dtype == dtype
+    native = native_pd.to_numeric(input)
+    if isinstance(snow, np.float64):
+        assert (np.isnan(snow) and np.isnan(native)) or snow == pytest.approx(native)
+    else:
+        assert snow == native
+
+
+@sql_count_checker(query_count=2)
+def test_downcast_ignored(downcast, caplog):
+    caplog.clear()
+    with caplog.at_level(logging.DEBUG):
+        pd.to_numeric("1", downcast=downcast)
+    if downcast:
+        assert "downcast is ignored in Snowflake backend" in caplog.text
+    else:
+        assert "downcast is ignored in Snowflake backend" not in caplog.text
+
+
+@sql_count_checker(query_count=2)
+def test_nan_to_numeric():
+    # snowpark pandas can handle "nan" correctly but native pandas does not
+    input = "nan"
+    assert np.isnan(pd.to_numeric(input))
+    with pytest.raises(
+        ValueError, match=f'Unable to parse string "{input}" at position 0'
+    ):
+        native_pd.to_numeric(input)
+
+
+@pytest.fixture(params=[47393996303418497800, 100000000000000000000])
+def large_val(request):
+    return request.param
+
+
+@sql_count_checker(query_count=2)
+def test_really_large_scalar(large_val):
+    snow = pd.to_numeric(large_val)
+    native = native_pd.to_numeric(large_val)
+    if isinstance(snow, np.float64):
+        assert snow == pytest.approx(native)
+    else:
+        assert snow == native
+
+
+def test_to_numeric_errors(errors):
+    input = ["apple", "1.0", "2", -3]
+
+    if errors == "raise":
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                pd.Series(input),
+                native_pd.Series(input),
+                lambda s: pd.to_numeric(s, errors=errors)
+                if isinstance(s, pd.Series)
+                else native_pd.to_numeric(s, errors=errors),
+                expect_exception=True,
+                expect_exception_type=SnowparkSQLException,
+                assert_exception_equal=False,  # pandas raise ValueError instead
+                expect_exception_match="Numeric value 'apple' is not recognized",
+            )
+    else:
+        if errors == "ignore":
+            expected_query_count = 8
+            expected_fallback_count = 1
+        else:
+            expected_query_count = 1
+            expected_fallback_count = 0
+        with SqlCounter(
+            query_count=expected_query_count, fallback_count=expected_fallback_count
+        ):
+            eval_snowpark_pandas_result(
+                pd.Series(input),
+                native_pd.Series(input),
+                lambda s: pd.to_numeric(s, errors=errors)
+                if isinstance(s, pd.Series)
+                else native_pd.to_numeric(s, errors=errors),
+            )
+
+
+@pytest.mark.parametrize(
+    "input",
+    [
+        ["apple", "1.0", "2", -3],
+        ["1", "NULL", "3"],
+    ],
+)
+@pytest.mark.parametrize(
+    "errors, expected_query_count, expected_fallback_count",
+    [
+        pytest.param(
+            "ignore",
+            7,
+            1,
+            marks=pytest.mark.xfail(
+                reason="SNOW-1336091: Snowpark pandas cannot run in sprocs until modin 0.28.1 is available in conda",
+                strict=True,
+                raises=RuntimeError,
+            ),
+        ),
+        ["coerce", 0, 0],
+    ],
+)
+def test_to_numeric_errors_dtype(
+    input, errors, expected_query_count, expected_fallback_count
+):
+    with SqlCounter(
+        query_count=expected_query_count, fallback_count=expected_fallback_count
+    ):
+        ret = pd.to_numeric(input, errors=errors)
+        if errors == "ignore":
+            # since it includes original value so the dtype is object
+            assert ret.dtype == np.dtype("object")
+        else:
+            # since invalid parsing will be treated as null, the dtype will be float64
+            assert ret.dtype == np.dtype("float64")
+
+
+@sql_count_checker(query_count=0)
+def test_to_numeric_errors_invalid():
+    input = ["apple", "1.0", "2", -3]
+    invalid = "invalid"
+    eval_snowpark_pandas_result(
+        pd.Series(input),
+        native_pd.Series(input),
+        lambda s: pd.to_numeric(s, errors=invalid)
+        if isinstance(s, pd.Series)
+        else native_pd.to_numeric(s, errors=invalid),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="invalid error value specified",
+    )
+
+
+@sql_count_checker(query_count=2)
+def test_list():
+    ser = ["1", "-3.14", "7"]
+    res = pd.to_numeric(ser)
+    expected = pd.Series([1, -3.14, 7])
+    assert_series_equal(res, expected)
+
+
+@sql_count_checker(query_count=2)
+def test_tuple():
+    tup = ("1", "-3.14", "7")
+    res = pd.to_numeric(tup)
+    expected = pd.Series([1, -3.14, 7])
+    assert_series_equal(res, expected)
+
+
+@pytest.mark.parametrize(
+    "input, expected_dtype",
+    [
+        (np.array(["1", "-3.14", "7"]), "float64"),
+        (np.array([-1, 0, 1], dtype=np.int8), "int64"),
+        (pd.array([-1, 0, 1, None], dtype=pd.Int64Dtype()), "float64"),
+    ],
+)
+@sql_count_checker(query_count=1)
+def test_1darray(input, expected_dtype):
+    res = pd.to_numeric(input)
+    expected = native_pd.Series(input).astype(expected_dtype)
+    assert_snowpark_pandas_equal_to_pandas(res, expected)
+
+
+@sql_count_checker(query_count=0)
+def test_2darray():
+    arr = np.array([["1", "-3.14", "7"]])
+    with pytest.raises(
+        TypeError, match="arg must be a list, tuple, 1-d array, or Series"
+    ):
+        pd.to_numeric(arr)
+
+
+@sql_count_checker(query_count=0)
+def test_type_check():
+    # see gh-11776
+    df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
+    with pytest.raises(
+        TypeError, match="arg must be a list, tuple, 1-d array, or Series"
+    ):
+        pd.to_numeric(df)
+
+
+def test_datetime_like(errors):
+    input = native_pd.date_range("20130101", periods=3)
+    if errors == "ignore":
+        expected_query_count = 8
+        expected_fallback_count = 1
+    else:
+        expected_query_count = 1
+        expected_fallback_count = 0
+
+    with SqlCounter(
+        query_count=expected_query_count, fallback_count=expected_fallback_count
+    ):
+        eval_snowpark_pandas_result(
+            pd.Series(input),
+            native_pd.Series(input),
+            lambda s: pd.to_numeric(s, errors=errors)
+            if isinstance(s, pd.Series)
+            else native_pd.to_numeric(s, errors=errors),
+        )
+
+
+@pytest.mark.parametrize(
+    "col_name_type, samples, native_series",
+    [
+        (
+            "obj object",
+            [
+                """select PARSE_JSON(' { "key1": "value1", "key2": NULL } ')""",
+                """select PARSE_JSON(' { "key1": 23, "key2": 23.0 } ')""",
+            ],
+            native_pd.Series(
+                np.array(
+                    [{"key1": "value1", "key2": None}, {"key1": 23, "key2": 23.0}]
+                ),
+                name="OBJ",
+            ),
+        ),
+        (
+            "arr array",
+            [
+                "select ARRAY_CONSTRUCT(1,2,3, ARRAY_CONSTRUCT(1,2,3))",
+                "select ARRAY_CONSTRUCT(1,2,3, ARRAY_CONSTRUCT(1,2,3))",
+            ],
+            native_pd.Series(["val", "val"], name="ARR",).apply(
+                lambda val: [1, 2, 3, [1, 2, 3]]
+            ),  # use `applymap` to create an array cell
+        ),
+        (
+            "date date",
+            "values ('2016-05-01'::date), ('2016-05-02'::date)",
+            native_pd.Series([date(2016, 5, 1), date(2016, 5, 2)], name="DATE"),
+        ),
+        (
+            "time time",
+            "values ('00:00:01'::time), ('23:59:59'::time)",
+            native_pd.Series([time(0, 0, 1), time(23, 59, 59)], name="TIME"),
+        ),
+        (
+            "bin binary",
+            "values ('48454C50'),('48454C50')",
+            native_pd.Series([b"HELP", b"HELP"], index=pd.Index([0, 1]), name="BIN"),
+        ),
+    ],
+)
+@pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
+def test_unsupported_types(
+    session,
+    test_table_name,
+    col_name_type,
+    samples,
+    native_series,
+    errors,
+):
+    Utils.create_table(session, test_table_name, col_name_type, is_temporary=True)
+    if not isinstance(samples, list):
+        samples = [samples]
+    for sample in samples:
+        session.sql(f"insert into {test_table_name} {sample}").collect()
+    series = pd.read_snowflake(test_table_name).squeeze()
+    with SqlCounter(query_count=0 if errors == "raise" else 1):
+        eval_snowpark_pandas_result(
+            series,
+            native_series,
+            lambda s: pd.to_numeric(s, errors=errors)
+            if isinstance(s, pd.Series)
+            else native_pd.to_numeric(s, errors=errors),
+            expect_exception=errors in (None, "raise"),
+            expect_exception_type=TypeError,
+            assert_exception_equal=False,
+            expect_exception_match="Invalid object type",
+        )
diff --git a/tests/integ/modin/utils.py b/tests/integ/modin/utils.py
new file mode 100644
index 00000000000..f2be3bc98a4
--- /dev/null
+++ b/tests/integ/modin/utils.py
@@ -0,0 +1,753 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import datetime
+import json
+from collections import namedtuple
+from math import isnan
+from typing import Any, Callable, Optional, Union
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pandas.testing as tm
+import pytest
+from modin.pandas import DataFrame, Series
+from pandas import isna
+from pandas._testing import assert_index_equal
+from pandas._typing import Scalar
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.inference import is_scalar
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin.utils import SupportsPublicToPandas
+from snowflake.snowpark.session import Session
+from snowflake.snowpark.types import StructField, StructType
+
+ValuesEqualType = Optional[
+    Union[
+        Scalar,
+        native_pd.DataFrame,
+        native_pd.Series,
+        native_pd.Index,
+        list,
+        set,
+        np.array,
+    ]
+]
+
+BASIC_TYPE_DATA1 = [
+    1,
+    "one",
+    1.0,
+    # TODO SNOW-667350: support datetime
+    # datetime.datetime.strptime("2017-02-24 12:00:05.456", "%Y-%m-%d %H:%M:%S.%f"),
+    datetime.datetime.strptime("20:57:06", "%H:%M:%S").time(),
+    datetime.datetime.strptime("2017-02-25", "%Y-%m-%d").date(),
+    True,
+    bytearray("a", "utf-8"),
+    # TODO: SNOW-800907 fix a bug in Python connector's to_pandas() method which converts number with scale > 0 to
+    # float64
+    # Decimal(0.5),
+]
+
+BASIC_TYPE_DATA2 = [
+    0,
+    "",
+    0.0,
+    # datetime.datetime.now(),
+    datetime.datetime.now().time(),
+    datetime.date.today(),
+    False,
+    bytes("snowflake", "utf-8"),
+    # Decimal(0),
+]
+
+SEMI_STRUCTURED_TYPE_DATA = [
+    ["'", 2],
+    [[1, 2], [2, 1]],
+    {"'": 1},
+    {"snow": {"fla": "ke"}},
+]
+
+BASIC_NUMPY_PANDAS_SCALAR_DATA = [
+    np.int64(1),
+    np.float64(-1.1),
+    np.bool_(False),
+    native_pd.Timestamp(datetime.datetime(2021, 1, 1)),
+]
+
+VALID_PANDAS_LABELS = [
+    5,
+    5.0,
+    ("5", 5),
+    " c o l ",
+    '"col',
+    '"c""ol',
+    "'col",
+    '"col"',
+    '"COL"',
+    "'co''l",
+    "COL",
+    "snowflake".encode("utf-16"),
+    "チリヌル",
+    "ป็นมนุ",
+    "熊猫",
+    json.dumps({"snow": {"fla": "ke"}}),
+]
+
+VALID_SNOWFLAKE_COLUMN_NAMES = [
+    "col",
+    "COL",
+    "Col",
+    '"C O L"',
+    "__col__",
+    '"co\'l"',
+    '"col"',
+    '"c""ol"',
+]
+
+VALID_SNOWFLAKE_COLUMN_NAMES_AND_ALIASES = [
+    ("col", "col_alias"),
+    ("COL", "COL_alias"),
+    ("Col", "Col_alias"),
+    ('"C O L"', '"C O L_alias"'),
+    ("__col__", "__col___alias"),
+    ('"co\'l"', '"co\'l_alias"'),
+    ('"col"', '"col_alias"'),
+    ('"c""ol"', '"c""ol_alias"'),
+]
+
+random_state = np.random.RandomState(seed=42)
+# Size of test dataframes
+NCOLS, NROWS = (2**3, 2**5)
+
+# Range for values for test data
+RAND_LOW = 0
+RAND_HIGH = 100
+TEST_DF_DATA = {
+    "float_nan_data": {
+        f"col{int((i - NCOLS / 2) % NCOLS + 1)}": [
+            x if (j != i and j - 2 != i and j + 2 != i) else np.NaN
+            for j, x in enumerate(
+                random_state.uniform(RAND_LOW, RAND_HIGH, size=(NROWS))
+            )
+        ]
+        for i in range(NCOLS)
+    },
+}
+
+
+TIMESTAMP_DATA_AND_TYPE = (
+    [
+        native_pd.Timestamp("2013-01-01"),
+        native_pd.Timestamp("2013-02-01"),
+        native_pd.Timestamp("2013-02-01"),
+        native_pd.Timestamp("2013-01-01"),
+    ],
+    "Timestamp",
+)
+ARRAY_DATA_AND_TYPE = ([[1], [2], [10], [20]], "Array")
+STRING_DATA_AND_TYPE = (["bb", "aa", "ca", "da"], "String")
+MAP_DATA_AND_TYPE = (
+    [
+        {"a": 1, "b": 2},
+        {"a": 1, "b": 2},
+        {"a": 1, "b": 2},
+        {"a": 1, "b": 2},
+    ],
+    "Object",
+)
+MIXED_NUMERIC_STR_DATA_AND_TYPE = ([1, "A", 2.5, None], "Variant")
+MAX_DICTIONARY_FORMAT_STRING_SIZE = 2000
+
+
+ColumnSchema = namedtuple("ColumnSchema", ["name", "snowpark_type"])
+
+
+def create_test_dfs(*args, **kwargs) -> tuple[pd.DataFrame, native_pd.DataFrame]:
+    """
+    Create a snowpark pandas and native pandas dataframe with the given arguments.
+
+    Args:
+        args: Positional arguments for the dataframe constructor.
+        kwargs: Keyword arguments for the dataframe constructor.
+
+    Returns:
+        A tuple where the first element is a snowpark pandas dataframe created
+        by forwarding the arguments to the snowpark dataframe constructor, and
+        the second element is a native pandas dataframe created by forwarding
+        the arguments to the pandas dataframe constructor.
+    """
+    return (pd.DataFrame(*args, **kwargs), native_pd.DataFrame(*args, **kwargs))
+
+
+def create_test_series(*args, **kwargs) -> tuple[pd.Series, native_pd.Series]:
+    """
+    Create a snowpark pandas and native pandas series with the given arguments.
+
+    Args:
+        args: Positional arguments for the series constructor.
+        kwargs: Keyword arguments for the series constructor.
+
+    Returns:
+        A tuple where the first element is a snowpark pandas series created
+        by forwarding the arguments to the snowpark series constructor, and
+        the second element is a native pandas series created by forwarding
+        the arguments to the pandas series constructor.
+    """
+    return (pd.Series(*args, **kwargs), native_pd.Series(*args, **kwargs))
+
+
+def try_to_load_json_string(value: Any) -> Any:
+    """
+    Tries to deserialize a json string to a Python value.
+    If not working, returns the original value.
+    """
+    try:
+        return json.loads(value)
+    except (json.JSONDecodeError, TypeError):
+        return value
+
+
+def assert_snowpark_pandas_equal_to_pandas(
+    snow: Union[DataFrame, Series],
+    expected_pandas: Union[native_pd.DataFrame, native_pd.Series],
+    *,
+    statement_params: Optional[dict[str, str]] = None,
+    expected_index_type: str = None,
+    expected_dtypes: list[str] = None,
+    **kwargs: Any,
+) -> None:
+    """
+    Check a snowpark pandas dataframe/series is equal to a pandas dataframe/series by converting snowpark pandas dataframe/series
+    to pandas datafram/series first using to_pandas(), and then assert them are the same.
+    Args:
+        snow: snowpark pandas dataframe or series
+        expected_pandas: native pandas dataframe or series
+        statement_params: Dictionary of statement level parameters to be set while executing this action.
+        expected_index_type: if not None then check snowpark pandas dataframe's index type is expected type
+        expected_dtypes: if not None then check snowpark pandas dataframe's column data types
+        **kwargs: other kwargs pass to assert_frame_equal
+
+    Raises:
+        AssertionError if the converted dataframe does not match with the original one
+    """
+    assert isinstance(snow, (DataFrame, Series))
+    assert isinstance(expected_pandas, (native_pd.DataFrame, native_pd.Series))
+    # Due to server-side compression, only check that index values are equivalent and ignore the
+    # index types. Snowpark pandas will use the smallest possible dtype (typically int8), while
+    # native pandas will default to int64.
+    kwargs.update(check_index_type=False)
+    if expected_dtypes is not None:
+        kwargs.update(check_dtype=False)
+
+    snow_to_native = snow.to_pandas(statement_params=statement_params)
+
+    if isinstance(expected_pandas, native_pd.DataFrame):
+        assert isinstance(snow, DataFrame)
+        snow_to_native = snow_to_native.replace({None: pd.NA})
+        tm.assert_frame_equal(snow_to_native, expected_pandas, **kwargs)
+    else:
+        assert isinstance(snow, Series)
+        snow_to_native = snow_to_native.replace({None: pd.NA})
+        tm.assert_series_equal(snow_to_native, expected_pandas, **kwargs)
+    if expected_index_type is not None:
+        assert (
+            expected_index_type == snow_to_native.index.dtype.name
+        ), f"Expected {expected_index_type} saw {snow_to_native.index.dtype.name}"
+    if expected_dtypes is not None:
+        if isinstance(snow_to_native, native_pd.Series):
+            assert [str(snow_to_native.dtype)] == expected_dtypes
+        else:
+            assert [str(dt) for dt in snow_to_native.dtypes.tolist()] == expected_dtypes
+
+
+def assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
+    snow: Union[DataFrame, Series],
+    native: Union[native_pd.DataFrame, native_pd.Series],
+    **kwargs,
+) -> None:
+    """
+    Check a snowpark pandas dataframe/series is equal to a pandas dataframe/series without check dtype.
+    """
+    assert_snowpark_pandas_equal_to_pandas(snow, native, check_dtype=False, **kwargs)
+
+
+def assert_snowpark_pandas_equals_to_pandas_with_coerce_to_float64(
+    snow: Union[DataFrame, Series],
+    native: Union[native_pd.DataFrame, native_pd.Series],
+    **kwargs,
+) -> None:
+    """
+    Check a snowpark pandas dataframe/series is equal to a pandas dataframe/series assuming data is all float64.
+    """
+    # Due to server-side compression, only check that index values are equivalent and ignore the
+    # index types. Snowpark pandas will use the smallest possible dtype (typically int8), while
+    # native pandas will default to int64.
+    kwargs.update(check_index_type=False)
+
+    snow_to_native = snow.to_pandas()
+
+    # Find all numeric (int, float) columns and create a mapping of col name to float64.  We also coerce int also
+    # because sometimes there are failures due to int8 vs int64 etc differences, for this reason we also cast native
+    # pandas the same way.
+    #
+    # Also, in some cases such as if a column has all None values, snowflake does not know the
+    # intended column type and will default to 'object' even while pandas may infer a 'float64' data type.  Since we're
+    # concerned with validating *values* here and not dtypes, we coerce to the snowpark pandas type in other cases.
+
+    if isinstance(snow, DataFrame):
+        coerce_col_types = {
+            col_dt[0]: "float64"
+            if col_dt[1] == "float64" or col_dt[1] == "int64"
+            else col_dt[1]
+            for col_dt in zip(snow.columns, snow.dtypes)
+        }
+
+        assert_frame_equal(
+            snow_to_native.astype(coerce_col_types),
+            native.astype(coerce_col_types),
+            rtol=1.0e-5,
+            **kwargs,
+        )
+    else:
+        assert_series_equal(
+            snow_to_native.astype("float64"),
+            native.astype("float64"),
+            rtol=1.0e-5,
+            **kwargs,
+        )
+
+
+def assert_series_equal(*args, **kwargs) -> None:
+    use_lhs = (
+        args[0].to_pandas() if isinstance(args[0], SupportsPublicToPandas) else args[0]
+    )
+    use_rhs = (
+        args[1].to_pandas() if isinstance(args[1], SupportsPublicToPandas) else args[1]
+    )
+    tm.assert_series_equal(use_lhs, use_rhs, *args[2:], **kwargs)
+
+
+def assert_frame_equal(*args, **kwargs) -> None:
+    use_lhs = (
+        args[0].to_pandas() if isinstance(args[0], SupportsPublicToPandas) else args[0]
+    )
+    use_rhs = (
+        args[1].to_pandas() if isinstance(args[1], SupportsPublicToPandas) else args[1]
+    )
+    tm.assert_frame_equal(use_lhs, use_rhs, *args[2:], **kwargs)
+
+
+def eval_snowpark_pandas_result(
+    snow_pandas: Any,
+    native_pandas: Any,
+    operation: Callable,
+    *,
+    # by default, we use the comparator without typecheck. Snowpark pandas does not guarantee
+    # the exact type matching of pandas due to the following reason:
+    # 1) Snowpark pandas backend is snowflake sql engine, and uses the snowflake type system.
+    #    The actual type for a column stored or after computation is decided by snowflake, which
+    #    can be different as pandas, for example, groupby().mean() on a column with int64, ends
+    #    with a column with decimal(128), but pandas ends up with float64.
+    # 2) Snowpark pandas to_pandas() maps the snowflake type to pandas type with a Snowpark defined type
+    #    mapping, which further introduces potential consistency.
+    # For general snowpark pandas api evaluation, we want to focus on the evaluation of the result
+    # shape and values, the type mapping will be tested separately (SNOW-841273).
+    comparator: Callable = assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
+    inplace: bool = False,
+    expect_exception: bool = False,
+    expect_exception_type: Optional[type[Exception]] = None,
+    expect_exception_match: Optional[str] = None,
+    assert_exception_equal: bool = True,
+    **kwargs: Any,
+) -> None:
+    """
+    evaluates the error or result of the Snowpark pandas object and pandas object after apply the given operation.
+
+    Args:
+        snow_pandas: a Snowpark pandas object to apply the operation on
+        native_pandas: a native pandas object to apply the operation on
+        operation: Callable. The operation to be applied on the Snowpark pandas and pandas object
+        comparator: Callable. Function used to perform the comparison, which must be in format of
+                                comparator(snowpark_pandas_res, pandas_res, **key_words)
+        inplace: bool. Whether the operation is an inplace operation or not
+        expect_exception: tuple of an Exception type. do we expect an exception during the operation
+        expect_exception_type: if not None, assert the exception type is expected
+        expect_exception_match: if not None, assert the exception match the expected regex
+        assert_exception_equal: bool. Whether to assert the exception from Snowpark pandas eqauls to pandas
+
+    Raises:
+        Exception if 1) if exception is raised during operation but no exception is expected
+                     2) if exception is expected, and during check exception, the exception type or message does't match
+                     3) if exception is expected, but no exception is raised
+                     4) if no exception is expected, and results are series or dataframe, but doesn't match
+        NotImplementedError if the result of the operation is neither a series or dataframe
+    """
+    if expect_exception:
+        with pytest.raises(Exception) as pd_e:
+            operation(native_pandas)
+        with pytest.raises(Exception) as snow_e:
+            result = operation(snow_pandas)
+            # some execution time errors need to be triggered by to_pandas()
+            if inplace:
+                # If the operation affected the snow_pandas object in place,
+                # we have to call to_pandas() on snow_pandas.
+                snow_pandas.to_pandas()
+            elif isinstance(result, (DataFrame, Series)):
+                # otherwise, we have to call to_pandas() on the result.
+                result.to_pandas()
+        if expect_exception_type:
+            assert (
+                snow_e.type == expect_exception_type
+            ), f"exception type {snow_e.type} does not match with expected type {expect_exception_type}"
+        if expect_exception_match:
+            assert snow_e.match(expect_exception_match)
+        if assert_exception_equal:
+            # check type
+            assert isinstance(
+                snow_e.value, type(pd_e.value)
+            ), f"Got Snowpark pandas Exception type {type(snow_e.value)}, but pandas Exception type {type(pd_e.value)} was expected.\n Snowpark pandas exception: {str(snow_e)}\npandas exception: {str(pd_e)}"
+            # check string message
+            pandas_err_msg = str(pd_e.value)
+            snow_err_msg = str(snow_e.value)
+            # in pandas 2, snowpark to_pandas can create a different dtype than pandas does by default
+            # in errors that print values that include the dtype, we should avoid failing
+            # on cases where e.g. int8 != int64, despite the remaining values being correct.
+            # We check up to the "dtype" argument in the message to solve this.
+            assert pandas_err_msg == snow_err_msg or (
+                "dtype" in pandas_err_msg
+                and "dtype" in snow_err_msg
+                and pandas_err_msg[: pandas_err_msg.index("dtype")]
+                == snow_err_msg[: snow_err_msg.index("dtype")]
+            ), f"Snowpark pandas Exception {snow_e.value} doesn't match pandas Exception {pd_e.value}"
+    else:
+        pd_result = operation(native_pandas)
+        snow_result = operation(snow_pandas)
+        if inplace:
+            pd_result = native_pandas
+            snow_result = snow_pandas
+
+        comparator(snow_result, pd_result, **(kwargs or {}))
+
+
+def _is_python_nan(v: Any) -> bool:
+    """
+    Tell whether the value is equal to the builtin python float nan.
+
+    NOTE that this is true for both np.nan and float('nan'). That behavior
+    matches the behavior of np.isnan()
+
+    Arguments:
+        v: The value to check
+
+    Returns:
+        bool
+    """
+    # calling isnan() on some types raises TypeError
+    try:
+        return isnan(v)
+    except TypeError:
+        return False
+
+
+def assert_values_equal(
+    actual: Any, expected: ValuesEqualType, *, check_index_type: bool = True
+) -> None:
+    """
+    Tell whether two values are equal.
+
+    Arguments:
+        actual: The actual value
+        expected: The expected value
+        check_index_type: If the values are instances of native_pd.Index, whether to compare their class and dtypes
+
+    Returns:
+        bool telling whether the values are equal.
+    """
+    if isinstance(expected, native_pd.DataFrame):
+        assert isinstance(
+            actual, native_pd.DataFrame
+        ), f"Expected object of type {native_pd.DataFrame} but instead got object {actual} of type {type(actual)}"
+        tm.assert_frame_equal(actual, expected)
+    elif isinstance(expected, native_pd.Series):
+        assert isinstance(
+            actual, native_pd.Series
+        ), f"Expected object of type {native_pd.Series} but instead got object {actual} of type {type(actual)}"
+        tm.assert_series_equal(actual, expected)
+    elif isinstance(expected, native_pd.Index):
+        assert_index_equal(actual, expected, exact=check_index_type and "equiv")
+    elif is_list_like(expected):
+        assert np.shape(expected) == np.shape(actual)
+        # check that we have the same type on each side, so that e.g.
+        # (1, 2) and [1, 2] are not equal. Also, if one of the arguments to
+        # assert_array_equal() is a scalar, then assert_array_equal() checks
+        # whether all the arguments in the other are equal to that scalar, so
+        # e.g. assert_array_equal(1, [1]) is valid. This type check takes care
+        # of that case without using assert_array_equal(strict=True), which would
+        # also check dtypes, which we don't want to check.
+        assert type(expected) == type(actual)
+        np.testing.assert_array_equal(actual, expected, strict=False)
+    elif expected is None:
+        assert actual is None
+    elif _is_python_nan(expected):
+        assert _is_python_nan(actual)
+    elif expected is pd.NaT:
+        assert actual is pd.NaT
+    elif expected is pd.NA:
+        assert actual is pd.NA
+    else:
+        # finally, fall back to comparison with ==.
+        # bool(np.array([1]) == 1) is True, so check that the shapes of
+        # `actual` and `expected` are the same.
+        assert np.shape(actual) == np.shape(expected)
+        assert (
+            actual == expected
+        ), f"actual != expected.\nActual: {actual}\nExpected: {expected}"
+
+
+def _short_dict_format_string(dictionary: dict) -> str:
+    """
+    Get a format string for the dictionary that limits the representation to MAX_DICTIONARY_FORMAT_STRING_SIZE.
+
+    Arguments:
+        dictionary: The dictionary
+
+    Returns:
+        The format string for the dictionary, possibly truncated to
+        MAX_DICTIONARY_FORMAT_STRING_SIZE. If truncated, the
+        message includes a string say that the dictionary's
+        representation was truncated.
+    """
+    formatted = format(dictionary)
+    return (
+        (formatted[:MAX_DICTIONARY_FORMAT_STRING_SIZE] + "... (dict truncated)")
+        if len(formatted) > MAX_DICTIONARY_FORMAT_STRING_SIZE
+        else formatted
+    )
+
+
+def _repr_both_dicts(
+    actual: dict[Any, Any], expected: dict[ValuesEqualType, ValuesEqualType]
+) -> str:
+    """
+    Get a message containing the actual and expected dictionaries.
+
+    The message contains the repr() of each dictionary with a label showing
+    which is expected which is the actual result. The two representations are
+    separated by a newline.
+
+    Arguments:
+        actual: The actual dictionary
+        expected: The expected dctionary
+
+    Returns:
+        str: The representation of both dicts.
+    """
+    return f"Actual dict: {_short_dict_format_string(actual)}\nExpected dict: {_short_dict_format_string(expected)}"
+
+
+def assert_dicts_equal(
+    actual: dict[Any, Any], expected: dict[ValuesEqualType, ValuesEqualType], **kwargs
+) -> None:
+    """
+    Raise ``AssertionError`` if two dicts are not equal.
+
+    Arguments:
+        actual: The actual dictionary
+        expected: The expected dictionary
+    """
+    assert isinstance(expected, dict), (
+        "Expected value is not a Dict, and instead is of type "
+        + f"{type(expected)}. Expected value is {expected} "
+    )
+    # Check that the types match exactly.
+    # Sometimes we check objects of types like PrettyDict and want to check
+    # that we have a PrettyDict and not an instance of a subclass of Dict, so
+    # we compare the types directly instead of using isinstance()
+    assert type(expected) is type(actual), (
+        "Actual type is not the same as expected type "
+        + f"{type(expected)}, and instead is type {type(actual)}. Actual "
+        + f"value is {actual}"
+    )
+    actual_len = len(actual)
+    expected_len = len(expected)
+    assert actual_len == expected_len, (
+        f"Actual and expected dicts have different lengths {actual_len} and "
+        + f"{expected_len} respectively.\n{_repr_both_dicts(actual, expected)}"
+    )
+    # Due to server-side compression, only check that index values are equivalent and ignore the
+    # index types. Snowpark pandas will use the smallest possible dtype (typically int8), while
+    # native pandas will default to int64.
+    kwargs.update(check_index_type=False)
+    for i, ((actual_key, actual_value), (expected_key, expected_value)) in enumerate(
+        zip(actual.items(), expected.items())
+    ):
+        try:
+            assert_values_equal(actual_key, expected_key, **kwargs)
+        except AssertionError as e:
+            raise AssertionError(
+                f"Actual dict has the wrong key at position {i}. Actual "
+                + f"key {actual_key} is not equal to expected key "
+                + f"{expected_key}.\n{_repr_both_dicts(actual, expected)}"
+            ) from e
+        try:
+            assert_values_equal(actual_value, expected_value, **kwargs)
+        except AssertionError as e:
+            raise AssertionError(
+                f"Actual dict has the wrong value at position {i} with key "
+                + f"{actual_key}. Actual value {actual_value} is not equal "
+                + f"to expected value {expected_value}.\n"
+                + _repr_both_dicts(actual, expected)
+            ) from e
+
+
+def create_snow_df_with_table_and_data(
+    session: Session,
+    table_name: str,
+    column_schema: list[ColumnSchema],
+    data: list[list[Any]],
+) -> pd.DataFrame:
+    """
+    Create a snowpark pandas dataframe out of a snowflake table. This function creates a snowflake
+    table using the given table schema and data, then call read_snowflake to create a snowpark pandas
+    dataframe out of it.
+
+    Args:
+        session: Session used to create the snowflake table.
+        table_name: name for the snowflake table to create.
+        column_schema: List[namedtuple(col_name, snowpark_type)]. List of pairs of column name and
+                    snowpark column data type to create for the table. Please check snowflake.snowpark.types
+                    for all types can be used.
+                    the column name is treated as case-sensitive and quoted during table creation.
+        data: data to insert into the created snowflake table, data is two-dimensional and each
+              row corresponds to each row in the table.
+
+    Returns:
+        snowpark pandas dataframe: a snowpark pandas dataframe created out of the given snowflake table.
+
+        For example: with column_schema: [('COL_0', IntegerType()), ('COL_1', FloatType())],
+                          data: [[0, 1.1], [1, 1.2], [2, 1.3]]
+                    you will get snowflake table as follows:
+                        "COL_0"     "COL_1"
+                          0           1.1
+                          1           1.2
+                          2           1.3
+    """
+
+    # create a table with the given table name
+    table_column_schema = StructType(
+        [
+            StructField(f'"{col_name}"', snowpark_type)
+            for col_name, snowpark_type in column_schema
+        ]
+    )
+
+    # convert nan in data to None, because else string 'NaN' will be produced from np.nan e.g.
+    data = [
+        [None if is_scalar(x) and isna(x) else x for x in sublist] for sublist in data
+    ]
+
+    session.create_dataframe(data, schema=table_column_schema).write.save_as_table(
+        table_name, table_type="temporary"
+    )
+
+    snow_df = pd.read_snowflake(table_name)
+    return snow_df
+
+
+def create_table_with_type(
+    session: "Session", name: str, schema: str, table_type: str = "temporary"
+):
+    session._run_query(f"create or replace {table_type} table {name} ({schema})")
+
+
+def update_none_in_df_data_test_cases(
+    test_cases_raw_data: list[tuple[dict[Any, Any], str]],
+    empty_value: Any,
+    test_case_label: str,
+):
+    return [
+        (
+            {
+                key: [empty_value if value is None else value for value in values]
+                for (key, values) in test_data.items()
+            },
+            f"{test_case}_{test_case_label}",
+        )
+        for test_data, test_case in test_cases_raw_data
+    ]
+
+
+def update_none_in_series_data_test_cases(
+    test_cases_raw_data: list[tuple[list[Any], str]],
+    empty_value: Any,
+    test_case_label: str,
+):
+    return [
+        (
+            [empty_value if value is None else value for value in test_data],
+            f"{test_case}_{test_case_label}",
+        )
+        for test_data, test_case in test_cases_raw_data
+    ]
+
+
+def try_cast_to_snowpark_pandas_series(value: Any) -> Any:
+    """
+    util function to convert an object to a Snowpark pandas Series. Helpful because pytest does not support
+    Snowpark pandas Series in pytest.mark.parametrize yet.
+    Args:
+        value: a value to be converted
+
+    Returns:
+        Snowpark pandas Series object
+    """
+    if isinstance(value, native_pd.Series):
+        return pd.Series(
+            data=value.values, index=value.index, dtype=value.dtype, name=value.name
+        )
+
+    return pd.Series(value)
+
+
+def try_cast_to_snowpark_pandas_dataframe(value: Any) -> Any:
+    """
+    util function to convert an object to a Snowpark pandas Dataframe. Helpful because pytest does not support
+    Snowpark pandas Dataframe in pytest.mark.parametrize yet.
+    Args:
+        value: a value to be converted
+
+    Returns:
+        Snowpark pandas Dataframe object
+    """
+    if isinstance(value, native_pd.DataFrame):
+        return pd.DataFrame(data=value.values, index=value.index, columns=value.columns)
+
+    return pd.DataFrame(value)
+
+
+def generate_a_random_permuted_list_exclude_self(value: list[Any]) -> list[Any]:
+    """
+    Given a value list, generate a new list that is a permutation of the given list, and the
+    generated list has to be different compare with the given value list.
+    """
+    permuted_index_value = list(np.random.permutation(value))
+    count = 0
+    while permuted_index_value == value and count < 5:
+        permuted_index_value = list(np.random.permutation(value))
+        count += 1
+
+    assert (
+        permuted_index_value != value
+    ), "Failed to generate permuted index value that is different than original index value"
+
+    return permuted_index_value
+
+
+def get_snowpark_dataframe_quoted_identifiers(
+    snowpark_dataframe: SnowparkDataFrame,
+) -> list[str]:
+    return [f.column_identifier.quoted_name for f in snowpark_dataframe.schema.fields]
diff --git a/tests/integ/modin/window/test_rolling.py b/tests/integ/modin/window/test_rolling.py
new file mode 100644
index 00000000000..56558f99a84
--- /dev/null
+++ b/tests/integ/modin/window/test_rolling.py
@@ -0,0 +1,241 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.window_utils import IMPLEMENTED_AGG_FUNCS
+from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker
+from tests.integ.modin.utils import eval_snowpark_pandas_result
+
+agg_func = pytest.mark.parametrize("agg_func", IMPLEMENTED_AGG_FUNCS)
+window = pytest.mark.parametrize("window", [1, 2, 3, 4, 6])
+min_periods = pytest.mark.parametrize("min_periods", [1, 2])
+center = pytest.mark.parametrize("center", [True, False])
+
+
+@agg_func
+@window
+@min_periods
+@center
+def test_rolling_dataframe(window, min_periods, center, agg_func):
+    native_df = native_pd.DataFrame(
+        {"A": ["h", "e", "l", "l", "o"], "B": [0, -1, 2.5, np.nan, 4]}
+    )
+    snow_df = pd.DataFrame(native_df)
+    if min_periods > window:
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(
+                    df.rolling(window=window, min_periods=min_periods, center=center),
+                    agg_func,
+                )(numeric_only=True),
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match=f"min_periods {min_periods} must be <= window {window}",
+            )
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(
+                    df.rolling(window=window, min_periods=min_periods, center=center),
+                    agg_func,
+                )(numeric_only=True),
+            )
+
+
+@agg_func
+@window
+@min_periods
+@center
+def test_rolling_null_dataframe(window, min_periods, center, agg_func):
+    native_df = native_pd.DataFrame(
+        {
+            "A": ["h", np.nan, "l", "l", "o"],
+            "B": [np.nan, np.nan, np.nan, np.nan, np.nan],
+        }
+    )
+    snow_df = pd.DataFrame(native_df)
+    if min_periods > window:
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(
+                    df.rolling(window=window, min_periods=min_periods, center=center),
+                    agg_func,
+                )(numeric_only=True),
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match=f"min_periods {min_periods} must be <= window {window}",
+            )
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_df,
+                native_df,
+                lambda df: getattr(
+                    df.rolling(window=window, min_periods=min_periods, center=center),
+                    agg_func,
+                )(numeric_only=True),
+            )
+
+
+@agg_func
+@window
+@min_periods
+@center
+def test_rolling_series(window, min_periods, center, agg_func):
+    native_series = native_pd.Series([0, -1, 2.5, np.nan, 4])
+    snow_series = pd.Series(native_series)
+    if min_periods > window:
+        with SqlCounter(query_count=0):
+            eval_snowpark_pandas_result(
+                snow_series,
+                native_series,
+                lambda series: getattr(
+                    series.rolling(
+                        window=window, min_periods=min_periods, center=center
+                    ),
+                    agg_func,
+                )(),
+                expect_exception=True,
+                expect_exception_type=ValueError,
+                expect_exception_match=f"min_periods {min_periods} must be <= window {window}",
+            )
+    else:
+        with SqlCounter(query_count=1):
+            eval_snowpark_pandas_result(
+                snow_series,
+                native_series,
+                lambda series: getattr(
+                    series.rolling(
+                        window=window, min_periods=min_periods, center=center
+                    ),
+                    agg_func,
+                )(),
+            )
+
+
+@sql_count_checker(query_count=0)
+def test_rolling_window_negative():
+    native_df = native_pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rolling(window=None).sum(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="window must be an integer 0 or greater",
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rolling(window=-2).sum(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="window must be an integer 0 or greater",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_rolling_min_periods_negative():
+    native_df = native_pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rolling(window=2, min_periods="invalid_value").sum(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="min_periods must be an integer",
+    )
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rolling(window=2, min_periods=-2).sum(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="min_periods must be >= 0",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_rolling_center_negative():
+    native_df = native_pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    snow_df = pd.DataFrame(native_df)
+    eval_snowpark_pandas_result(
+        snow_df,
+        native_df,
+        lambda df: df.rolling(window=2, center="invalid_value").sum(),
+        expect_exception=True,
+        expect_exception_type=ValueError,
+        expect_exception_match="center must be a boolean",
+    )
+
+
+@sql_count_checker(query_count=0)
+def test_rolling_window_unsupported():
+    snow_df = pd.DataFrame(
+        {"B": [0, 1, 2, np.nan, 4]},
+        index=[
+            pd.Timestamp("20130101 09:00:00"),
+            pd.Timestamp("20130101 09:00:02"),
+            pd.Timestamp("20130101 09:00:03"),
+            pd.Timestamp("20130101 09:00:05"),
+            pd.Timestamp("20130101 09:00:06"),
+        ],
+    )
+    with pytest.raises(NotImplementedError):
+        snow_df.rolling(window="2s", min_periods=None).sum()
+
+
+@pytest.mark.parametrize(
+    "function",
+    [
+        lambda df: df.rolling(2, min_periods=0).sum(),
+        lambda df: df.rolling(2, min_periods=None).sum(),
+        lambda df: df.rolling(2, win_type="barthann").sum(),
+        lambda df: df.rolling(2, on="B").sum(),
+        lambda df: df.rolling(2, axis=1).sum(),
+        lambda df: df.rolling(2, closed="left").sum(),
+        lambda df: df.rolling(2, step=2).sum(),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_rolling_params_unsupported(function):
+    snow_df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    with pytest.raises(NotImplementedError):
+        function(snow_df)
+
+
+@pytest.mark.parametrize(
+    "agg_func, agg_func_kwargs",
+    [
+        ("count", None),
+        ("sem", None),
+        ("median", None),
+        ("corr", None),
+        ("cov", None),
+        ("skew", None),
+        ("kurt", None),
+        ("apply", "min"),
+        ("aggregate", "min"),
+        ("quantile", 0.5),
+        ("rank", None),
+    ],
+)
+@sql_count_checker(query_count=0)
+def test_rolling_aggregation_unsupported(agg_func, agg_func_kwargs):
+    snow_df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
+    with pytest.raises(NotImplementedError):
+        getattr(snow_df.rolling(window=2, min_periods=1), agg_func)(agg_func_kwargs),
diff --git a/tests/resources/iris.csv b/tests/resources/iris.csv
new file mode 100644
index 00000000000..20bd6ee5772
--- /dev/null
+++ b/tests/resources/iris.csv
@@ -0,0 +1,151 @@
+sepal_length,sepal_width,petal_length,petal_width,species
+5.1,3.5,1.4,0.2,setosa
+4.9,3.0,1.4,0.2,setosa
+4.7,3.2,1.3,0.2,setosa
+4.6,3.1,1.5,0.2,setosa
+5.0,3.6,1.4,0.2,setosa
+5.4,3.9,1.7,0.4,setosa
+4.6,3.4,1.4,0.3,setosa
+5.0,3.4,1.5,0.2,setosa
+4.4,2.9,1.4,0.2,setosa
+4.9,3.1,1.5,0.1,setosa
+5.4,3.7,1.5,0.2,setosa
+4.8,3.4,1.6,0.2,setosa
+4.8,3.0,1.4,0.1,setosa
+4.3,3.0,1.1,0.1,setosa
+5.8,4.0,1.2,0.2,setosa
+5.7,4.4,1.5,0.4,setosa
+5.4,3.9,1.3,0.4,setosa
+5.1,3.5,1.4,0.3,setosa
+5.7,3.8,1.7,0.3,setosa
+5.1,3.8,1.5,0.3,setosa
+5.4,3.4,1.7,0.2,setosa
+5.1,3.7,1.5,0.4,setosa
+4.6,3.6,1.0,0.2,setosa
+5.1,3.3,1.7,0.5,setosa
+4.8,3.4,1.9,0.2,setosa
+5.0,3.0,1.6,0.2,setosa
+5.0,3.4,1.6,0.4,setosa
+5.2,3.5,1.5,0.2,setosa
+5.2,3.4,1.4,0.2,setosa
+4.7,3.2,1.6,0.2,setosa
+4.8,3.1,1.6,0.2,setosa
+5.4,3.4,1.5,0.4,setosa
+5.2,4.1,1.5,0.1,setosa
+5.5,4.2,1.4,0.2,setosa
+4.9,3.1,1.5,0.2,setosa
+5.0,3.2,1.2,0.2,setosa
+5.5,3.5,1.3,0.2,setosa
+4.9,3.6,1.4,0.1,setosa
+4.4,3.0,1.3,0.2,setosa
+5.1,3.4,1.5,0.2,setosa
+5.0,3.5,1.3,0.3,setosa
+4.5,2.3,1.3,0.3,setosa
+4.4,3.2,1.3,0.2,setosa
+5.0,3.5,1.6,0.6,setosa
+5.1,3.8,1.9,0.4,setosa
+4.8,3.0,1.4,0.3,setosa
+5.1,3.8,1.6,0.2,setosa
+4.6,3.2,1.4,0.2,setosa
+5.3,3.7,1.5,0.2,setosa
+5.0,3.3,1.4,0.2,setosa
+7.0,3.2,4.7,1.4,versicolor
+6.4,3.2,4.5,1.5,versicolor
+6.9,3.1,4.9,1.5,versicolor
+5.5,2.3,4.0,1.3,versicolor
+6.5,2.8,4.6,1.5,versicolor
+5.7,2.8,4.5,1.3,versicolor
+6.3,3.3,4.7,1.6,versicolor
+4.9,2.4,3.3,1.0,versicolor
+6.6,2.9,4.6,1.3,versicolor
+5.2,2.7,3.9,1.4,versicolor
+5.0,2.0,3.5,1.0,versicolor
+5.9,3.0,4.2,1.5,versicolor
+6.0,2.2,4.0,1.0,versicolor
+6.1,2.9,4.7,1.4,versicolor
+5.6,2.9,3.6,1.3,versicolor
+6.7,3.1,4.4,1.4,versicolor
+5.6,3.0,4.5,1.5,versicolor
+5.8,2.7,4.1,1.0,versicolor
+6.2,2.2,4.5,1.5,versicolor
+5.6,2.5,3.9,1.1,versicolor
+5.9,3.2,4.8,1.8,versicolor
+6.1,2.8,4.0,1.3,versicolor
+6.3,2.5,4.9,1.5,versicolor
+6.1,2.8,4.7,1.2,versicolor
+6.4,2.9,4.3,1.3,versicolor
+6.6,3.0,4.4,1.4,versicolor
+6.8,2.8,4.8,1.4,versicolor
+6.7,3.0,5.0,1.7,versicolor
+6.0,2.9,4.5,1.5,versicolor
+5.7,2.6,3.5,1.0,versicolor
+5.5,2.4,3.8,1.1,versicolor
+5.5,2.4,3.7,1.0,versicolor
+5.8,2.7,3.9,1.2,versicolor
+6.0,2.7,5.1,1.6,versicolor
+5.4,3.0,4.5,1.5,versicolor
+6.0,3.4,4.5,1.6,versicolor
+6.7,3.1,4.7,1.5,versicolor
+6.3,2.3,4.4,1.3,versicolor
+5.6,3.0,4.1,1.3,versicolor
+5.5,2.5,4.0,1.3,versicolor
+5.5,2.6,4.4,1.2,versicolor
+6.1,3.0,4.6,1.4,versicolor
+5.8,2.6,4.0,1.2,versicolor
+5.0,2.3,3.3,1.0,versicolor
+5.6,2.7,4.2,1.3,versicolor
+5.7,3.0,4.2,1.2,versicolor
+5.7,2.9,4.2,1.3,versicolor
+6.2,2.9,4.3,1.3,versicolor
+5.1,2.5,3.0,1.1,versicolor
+5.7,2.8,4.1,1.3,versicolor
+6.3,3.3,6.0,2.5,virginica
+5.8,2.7,5.1,1.9,virginica
+7.1,3.0,5.9,2.1,virginica
+6.3,2.9,5.6,1.8,virginica
+6.5,3.0,5.8,2.2,virginica
+7.6,3.0,6.6,2.1,virginica
+4.9,2.5,4.5,1.7,virginica
+7.3,2.9,6.3,1.8,virginica
+6.7,2.5,5.8,1.8,virginica
+7.2,3.6,6.1,2.5,virginica
+6.5,3.2,5.1,2.0,virginica
+6.4,2.7,5.3,1.9,virginica
+6.8,3.0,5.5,2.1,virginica
+5.7,2.5,5.0,2.0,virginica
+5.8,2.8,5.1,2.4,virginica
+6.4,3.2,5.3,2.3,virginica
+6.5,3.0,5.5,1.8,virginica
+7.7,3.8,6.7,2.2,virginica
+7.7,2.6,6.9,2.3,virginica
+6.0,2.2,5.0,1.5,virginica
+6.9,3.2,5.7,2.3,virginica
+5.6,2.8,4.9,2.0,virginica
+7.7,2.8,6.7,2.0,virginica
+6.3,2.7,4.9,1.8,virginica
+6.7,3.3,5.7,2.1,virginica
+7.2,3.2,6.0,1.8,virginica
+6.2,2.8,4.8,1.8,virginica
+6.1,3.0,4.9,1.8,virginica
+6.4,2.8,5.6,2.1,virginica
+7.2,3.0,5.8,1.6,virginica
+7.4,2.8,6.1,1.9,virginica
+7.9,3.8,6.4,2.0,virginica
+6.4,2.8,5.6,2.2,virginica
+6.3,2.8,5.1,1.5,virginica
+6.1,2.6,5.6,1.4,virginica
+7.7,3.0,6.1,2.3,virginica
+6.3,3.4,5.6,2.4,virginica
+6.4,3.1,5.5,1.8,virginica
+6.0,3.0,4.8,1.8,virginica
+6.9,3.1,5.4,2.1,virginica
+6.7,3.1,5.6,2.4,virginica
+6.9,3.1,5.1,2.3,virginica
+5.8,2.7,5.1,1.9,virginica
+6.8,3.2,5.9,2.3,virginica
+6.7,3.3,5.7,2.5,virginica
+6.7,3.0,5.2,2.3,virginica
+6.3,2.5,5.0,1.9,virginica
+6.5,3.0,5.2,2.0,virginica
+6.2,3.4,5.4,2.3,virginica
+5.9,3.0,5.1,1.8,virginica
diff --git a/tests/unit/modin/__init__.py b/tests/unit/modin/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/unit/modin/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/unit/modin/conftest.py b/tests/unit/modin/conftest.py
new file mode 100644
index 00000000000..256c0e7aa3f
--- /dev/null
+++ b/tests/unit/modin/conftest.py
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from unittest import mock
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+
+
+@pytest.fixture(scope="function")
+def mock_single_col_query_compiler() -> SnowflakeQueryCompiler:
+    mock_internal_frame = mock.create_autospec(InternalFrame)
+    mock_internal_frame.data_columns_index = native_pd.Index(["A"], name="B")
+    mock_internal_frame.data_column_snowflake_quoted_identifiers = ['"A"']
+    fake_query_compiler = SnowflakeQueryCompiler(mock_internal_frame)
+
+    return fake_query_compiler
+
+
+@pytest.fixture(scope="function")
+def mock_series(mock_single_col_query_compiler) -> pd.Series:
+    fake_query_compiler = mock_single_col_query_compiler
+    fake_series = pd.Series(query_compiler=fake_query_compiler)
+
+    return fake_series
+
+
+@pytest.fixture(scope="function")
+def mock_dataframe(mock_single_col_query_compiler) -> pd.DataFrame:
+    fake_query_compiler = mock_single_col_query_compiler
+    fake_dataframe = pd.DataFrame(query_compiler=fake_query_compiler)
+
+    return fake_dataframe
+
+
+@pytest.fixture(scope="function")
+def mock_dataframe_multicolumns() -> pd.DataFrame:
+    mock_internal_frame = mock.create_autospec(InternalFrame)
+    mock_internal_frame.data_columns_index = native_pd.Index(
+        ["A", "B", "C"], name="cols"
+    )
+    fake_query_compiler = SnowflakeQueryCompiler(mock_internal_frame)
+
+    return pd.DataFrame(query_compiler=fake_query_compiler)
diff --git a/tests/unit/modin/default2pandas/__init__.py b/tests/unit/modin/default2pandas/__init__.py
new file mode 100644
index 00000000000..0fbef920926
--- /dev/null
+++ b/tests/unit/modin/default2pandas/__init__.py
@@ -0,0 +1,3 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
diff --git a/tests/unit/modin/default2pandas/test_stored_procedure_utils.py b/tests/unit/modin/default2pandas/test_stored_procedure_utils.py
new file mode 100644
index 00000000000..a57d7c0b912
--- /dev/null
+++ b/tests/unit/modin/default2pandas/test_stored_procedure_utils.py
@@ -0,0 +1,297 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from typing import Any, Union
+from unittest import mock
+
+import pytest
+from modin.pandas import DataFrame, Series
+
+from snowflake.snowpark import Session
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.modin.plugin.default2pandas.stored_procedure_utils import (
+    SnowparkPandasObjectPickleData,
+    SnowparkPandasObjectType,
+    StoredProcedureDefault,
+)
+from snowflake.snowpark.types import (
+    ColumnIdentifier,
+    IntegerType,
+    StructField,
+    StructType,
+)
+
+
+def mock_snowpark_dataframe() -> SnowparkDataFrame:
+    fake_snowpark_dataframe = mock.create_autospec(SnowparkDataFrame)
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier('"A"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"B"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"C"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"D"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"INDEX"'), datatype=IntegerType
+            ),
+        ]
+    )
+    fake_snowpark_dataframe.schema = snowpark_df_schema
+    fake_snowpark_dataframe.select.return_value = fake_snowpark_dataframe
+
+    return fake_snowpark_dataframe
+
+
+@pytest.fixture(scope="module")
+def mock_session() -> Session:
+    fake_session = mock.create_autospec(Session)
+    fake_snowpark_dataframe = mock_snowpark_dataframe()
+    fake_session.table.return_value = fake_snowpark_dataframe
+
+    return fake_session
+
+
+def create_df_test_pickle_data(
+    table_name: str, obj_type: SnowparkPandasObjectType
+) -> SnowparkPandasObjectPickleData:
+    return SnowparkPandasObjectPickleData(
+        table_name=table_name,
+        object_type=obj_type.name,
+        data_column_pandas_labels=["A", "B"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"A"', '"B"'],
+        index_column_pandas_labels=["INDEX"],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        ordering_columns=[OrderingColumn('"INDEX"')],
+        row_position_snowflake_quoted_identifier=None,
+    )
+
+
+def create_series_test_pickle_data(table_name: str) -> SnowparkPandasObjectPickleData:
+    return SnowparkPandasObjectPickleData(
+        table_name=table_name,
+        object_type=SnowparkPandasObjectType.SERIES.name,
+        data_column_pandas_labels=["C"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"C"'],
+        index_column_pandas_labels=["INDEX"],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        ordering_columns=[OrderingColumn('"INDEX"')],
+        row_position_snowflake_quoted_identifier=None,
+    )
+
+
+def mock_snowpark_pandas_object(
+    obj_type: SnowparkPandasObjectType,
+) -> Union[DataFrame, Series, SnowflakeQueryCompiler]:
+    fake_snowpark_dataframe = mock_snowpark_dataframe()
+    fake_snowpark_dataframe.write.save_as_table.return_value = None
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(fake_snowpark_dataframe)
+    ).sort(OrderingColumn('"INDEX"'))
+
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=["C"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"C"'],
+        index_column_pandas_labels=["INDEX"],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    query_compiler = SnowflakeQueryCompiler(internal_frame)
+
+    if obj_type == SnowparkPandasObjectType.DATAFRAME:
+        return DataFrame(query_compiler=query_compiler)
+    if obj_type == SnowparkPandasObjectType.SERIES:
+        return Series(query_compiler=query_compiler)
+
+    return query_compiler
+
+
+def validate_obj_type_recursively(obj: Any, expected_type: Any) -> None:
+    """
+    Recursively validate the type of the obj and all nested object matches the expected type.
+
+    The data structure of obj must be the same as expected_type, for example, if obj is a list, the
+    expected_type must in a list.
+
+    Args:
+        obj: the object to check the type for
+        expected_type: the expected type for the object
+
+    Raises:
+        AssertionException if the type doesn't match
+    """
+    if isinstance(obj, (list, tuple)) and isinstance(expected_type, (list, tuple)):
+        for o, t in zip(obj, expected_type):
+            validate_obj_type_recursively(o, t)
+    elif isinstance(obj, dict) and isinstance(expected_type, dict):
+        for k, v in obj.items():
+            validate_obj_type_recursively(v, expected_type[k])
+    else:
+        assert isinstance(obj, expected_type)
+
+
+@pytest.mark.parametrize(
+    "obj, pickle_data_dict, expected_obj_type",
+    [
+        (True, {}, bool),
+        (
+            "temp_table",
+            {
+                "temp_table": create_df_test_pickle_data(
+                    "temp_table", SnowparkPandasObjectType.QUERY_COMPILER
+                )
+            },
+            SnowflakeQueryCompiler,
+        ),
+        ("temp_table", {}, str),
+        (
+            ["temp_table", 5, "temp_table1"],
+            {
+                "temp_table": create_df_test_pickle_data(
+                    "temp_table", SnowparkPandasObjectType.DATAFRAME
+                ),
+                "temp_table2": create_series_test_pickle_data("temp_table2"),
+            },
+            [DataFrame, int, str],
+        ),
+        (
+            ("temp_table", 5, ["temp_table2", "test"]),
+            {
+                "temp_table": create_df_test_pickle_data(
+                    "temp_table", SnowparkPandasObjectType.DATAFRAME
+                ),
+                "temp_table2": create_series_test_pickle_data("temp_table2"),
+            },
+            [DataFrame, int, [Series, str]],
+        ),
+        (
+            {"arg1": True, "arg2": "temp_table", "arg3": "temp_table1"},
+            {
+                "temp_table": create_df_test_pickle_data(
+                    "temp_table", SnowparkPandasObjectType.QUERY_COMPILER
+                ),
+                "temp_table2": create_series_test_pickle_data("temp_table2"),
+                "temp_table3": create_df_test_pickle_data(
+                    "temp_table3", SnowparkPandasObjectType.DATAFRAME
+                ),
+            },
+            {"arg1": bool, "arg2": SnowflakeQueryCompiler, "arg3": str},
+        ),
+        (
+            {
+                "arg1": True,
+                "arg2": "temp_table",
+                "arg3": ["temp_table1", "temp_table3"],
+            },
+            {
+                "temp_table": create_df_test_pickle_data(
+                    "temp_table", SnowparkPandasObjectType.QUERY_COMPILER
+                ),
+                "temp_table2": create_series_test_pickle_data("temp_table2"),
+                "temp_table3": create_df_test_pickle_data(
+                    "temp_table3", SnowparkPandasObjectType.DATAFRAME
+                ),
+            },
+            {
+                "arg1": bool,
+                "arg2": SnowflakeQueryCompiler,
+                "arg3": [str, DataFrame],
+            },
+        ),
+    ],
+)
+def test_try_recover_snowpark_pandas_objects(
+    mock_session, obj, pickle_data_dict, expected_obj_type
+) -> None:
+    obj_result = StoredProcedureDefault._try_recover_snowpark_pandas_objects(
+        mock_session, obj, pickle_data_dict
+    )
+    validate_obj_type_recursively(obj_result, expected_obj_type)
+
+
+@pytest.mark.parametrize(
+    "obj, expected_obj_type, pickle_data_size",
+    [
+        (mock_snowpark_pandas_object(SnowparkPandasObjectType.QUERY_COMPILER), str, 1),
+        ("test", str, 0),
+        (
+            [
+                5,
+                mock_snowpark_pandas_object(SnowparkPandasObjectType.QUERY_COMPILER),
+                "test",
+            ],
+            [int, str, str],
+            1,
+        ),
+        (
+            [
+                5,
+                mock_snowpark_pandas_object(SnowparkPandasObjectType.QUERY_COMPILER),
+                (
+                    "test",
+                    mock_snowpark_pandas_object(SnowparkPandasObjectType.DATAFRAME),
+                    [
+                        mock_snowpark_pandas_object(SnowparkPandasObjectType.SERIES),
+                        False,
+                    ],
+                ),
+            ],
+            [int, str, (str, str, [str, bool])],
+            3,
+        ),
+        (
+            {
+                "arg1": True,
+                "arg2": mock_snowpark_pandas_object(
+                    SnowparkPandasObjectType.QUERY_COMPILER
+                ),
+                "arg3": mock_snowpark_pandas_object(SnowparkPandasObjectType.SERIES),
+            },
+            {"arg1": bool, "arg2": str, "arg3": str},
+            2,
+        ),
+        (
+            {
+                "arg1": True,
+                "arg2": [
+                    mock_snowpark_pandas_object(
+                        SnowparkPandasObjectType.QUERY_COMPILER
+                    ),
+                    10,
+                ],
+                "arg3": mock_snowpark_pandas_object(SnowparkPandasObjectType.SERIES),
+            },
+            {"arg1": bool, "arg2": [str, int], "arg3": str},
+            2,
+        ),
+    ],
+)
+def test_try_pickle_snowpark_pandas_objects(
+    obj, expected_obj_type, pickle_data_size
+) -> None:
+    (
+        obj_result,
+        pickle_data,
+    ) = StoredProcedureDefault._try_pickle_snowpark_pandas_objects(obj)
+    validate_obj_type_recursively(obj_result, expected_obj_type)
+    assert len(pickle_data) == pickle_data_size
diff --git a/tests/unit/modin/extensions/__init__.py b/tests/unit/modin/extensions/__init__.py
new file mode 100644
index 00000000000..3e10844f842
--- /dev/null
+++ b/tests/unit/modin/extensions/__init__.py
@@ -0,0 +1,16 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
diff --git a/tests/unit/modin/extensions/test_pd_extensions.py b/tests/unit/modin/extensions/test_pd_extensions.py
new file mode 100644
index 00000000000..f3ea3bc7053
--- /dev/null
+++ b/tests/unit/modin/extensions/test_pd_extensions.py
@@ -0,0 +1,43 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import modin.pandas as pd
+from modin.pandas.api.extensions import register_pd_accessor
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+
+def test_pd_extension_simple_method():
+    expected_string_val = "Some string value"
+    method_name = "new_method"
+
+    @register_pd_accessor(method_name)
+    def my_method_implementation():
+        return expected_string_val
+
+    assert method_name in pd._PD_EXTENSIONS_.keys()
+    assert pd._PD_EXTENSIONS_[method_name] is my_method_implementation
+    assert pd.new_method() == expected_string_val
+
+
+def test_pd_extension_non_method():
+    expected_val = 4
+    attribute_name = "four"
+    register_pd_accessor(attribute_name)(expected_val)
+    assert attribute_name in pd._PD_EXTENSIONS_.keys()
+    assert pd._PD_EXTENSIONS_[attribute_name] == 4
+    assert pd.four == expected_val
diff --git a/tests/unit/modin/modin/__init__.py b/tests/unit/modin/modin/__init__.py
new file mode 100644
index 00000000000..3e10844f842
--- /dev/null
+++ b/tests/unit/modin/modin/__init__.py
@@ -0,0 +1,16 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
diff --git a/tests/unit/modin/modin/docs_module/__init__.py b/tests/unit/modin/modin/docs_module/__init__.py
new file mode 100644
index 00000000000..a39b082ae34
--- /dev/null
+++ b/tests/unit/modin/modin/docs_module/__init__.py
@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from .classes import BasePandasDataset, DataFrame, Series
+from .functions import read_csv
+
+__all__ = ["BasePandasDataset", "DataFrame", "Series", "read_csv"]
diff --git a/tests/unit/modin/modin/docs_module/classes.py b/tests/unit/modin/modin/docs_module/classes.py
new file mode 100644
index 00000000000..9af719f5f17
--- /dev/null
+++ b/tests/unit/modin/modin/docs_module/classes.py
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+class DataFrame:
+    def apply(self):
+        """This is a test of the documentation module for DataFrame."""
+        return
+
+
+class Series:
+    def isna(self):
+        """This is a test of the documentation module for Series."""
+        return
+
+
+class BasePandasDataset:
+    """This is a test of the documentation module for BasePandasDataSet."""
+
+    def apply():
+        """This is a test of the documentation module for BasePandasDataSet.apply."""
+
+    def astype():
+        """This is a test of the documentation module for BasePandasDataSet.astype."""
+
+    @property
+    def loc():
+        """This is a test of the documentation module for BasePandasDataset.loc."""
diff --git a/tests/unit/modin/modin/docs_module/functions.py b/tests/unit/modin/modin/docs_module/functions.py
new file mode 100644
index 00000000000..86ad5287206
--- /dev/null
+++ b/tests/unit/modin/modin/docs_module/functions.py
@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+def read_csv():
+    """Test override for functions on the module."""
+    return
diff --git a/tests/unit/modin/modin/docs_module_with_just_base/__init__.py b/tests/unit/modin/modin/docs_module_with_just_base/__init__.py
new file mode 100644
index 00000000000..77df3098e17
--- /dev/null
+++ b/tests/unit/modin/modin/docs_module_with_just_base/__init__.py
@@ -0,0 +1,20 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from .classes import BasePandasDataset
+
+__all__ = ["BasePandasDataset"]
diff --git a/tests/unit/modin/modin/docs_module_with_just_base/classes.py b/tests/unit/modin/modin/docs_module_with_just_base/classes.py
new file mode 100644
index 00000000000..b7bbf4ba920
--- /dev/null
+++ b/tests/unit/modin/modin/docs_module_with_just_base/classes.py
@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+
+class BasePandasDataset:
+    def astype():
+        """This is a test of the documentation module for BasePandasDataSet.astype."""
diff --git a/tests/unit/modin/modin/test_envvars.py b/tests/unit/modin/modin/test_envvars.py
new file mode 100644
index 00000000000..4f3540a63bf
--- /dev/null
+++ b/tests/unit/modin/modin/test_envvars.py
@@ -0,0 +1,371 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import os
+import sys
+import unittest.mock
+import warnings
+
+import modin.pandas as pd
+import pandas
+import pytest
+from packaging import version
+
+import snowflake.snowpark.modin.config as cfg
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.config import DocModule
+from snowflake.snowpark.modin.config.envvars import _check_vars
+from snowflake.snowpark.modin.config.pubsub import _UNSET, ExactStr
+
+
+def reset_vars(*vars: tuple[cfg.Parameter]):
+    """
+    Reset value for the passed parameters.
+
+    Parameters
+    ----------
+    *vars : tuple[Parameter]
+    """
+    for var in vars:
+        var._value = _UNSET
+        _ = os.environ.pop(var.varname, None)
+
+
+@pytest.fixture
+def make_unknown_env():
+    varname = "MODIN_UNKNOWN"
+    os.environ[varname] = "foo"
+    yield varname
+    del os.environ[varname]
+
+
+@pytest.fixture(params=[str, ExactStr])
+def make_custom_envvar(request):
+    class CustomVar(cfg.EnvironmentVariable, type=request.param):
+        """custom var"""
+
+        default = 10
+        varname = "MODIN_CUSTOM"
+        choices = (1, 5, 10)
+
+    return CustomVar
+
+
+@pytest.fixture
+def set_custom_envvar(make_custom_envvar):
+    os.environ[make_custom_envvar.varname] = "  custom  "
+    yield "Custom" if make_custom_envvar.type is str else "  custom  "
+    del os.environ[make_custom_envvar.varname]
+
+
+def test_unknown(make_unknown_env):
+    with pytest.warns(UserWarning, match=f"Found unknown .*{make_unknown_env}.*"):
+        _check_vars()
+
+
+def test_custom_default(make_custom_envvar):
+    assert make_custom_envvar.get() == 10
+
+
+def test_custom_set(make_custom_envvar, set_custom_envvar):
+    assert make_custom_envvar.get() == set_custom_envvar
+
+
+def test_custom_help(make_custom_envvar):
+    assert "MODIN_CUSTOM" in make_custom_envvar.get_help()
+    assert "custom var" in make_custom_envvar.get_help()
+
+
+class TestDocModule:
+    """
+    Test using a module to replace default docstrings.
+
+    TODO: this class tests fixes for
+    https://github.com/modin-project/modin/issues/7138,
+    which we need to fix in upstream modin.
+    """
+
+    def test_overrides(self):
+        # Put the docs_module on the path
+        sys.path.append(f"{os.path.dirname(__file__)}")
+        DocModule.put("docs_module")
+
+        # Test for override
+        # TODO(https://github.com/modin-project/modin/issues/7134): Upstream
+        # the BasePandasDataset tests to modin.
+        assert pd.base.BasePandasDataset.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet."
+        )
+        assert (
+            pd.DataFrame.apply.__doc__
+            == "This is a test of the documentation module for DataFrame."
+        )
+        assert pd.base.BasePandasDataset.apply.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.apply."
+        )
+        # Test scenario 2 from https://github.com/modin-project/modin/issues/7113:
+        # We can correctly override the docstring for BasePandasDataset.astype,
+        # which is the same method as Series.astype.
+        # TODO(https://github.com/modin-project/modin/issues/7113): Upstream
+        # this change to modin.
+        assert pd.Series.astype is pd.base.BasePandasDataset.astype
+        assert pd.base.BasePandasDataset.astype.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.astype."
+        )
+        # Test for pandas doc when method is not defined on the plugin module
+        assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
+        assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
+        # Test for override
+        assert (
+            pd.Series.isna.__doc__
+            == "This is a test of the documentation module for Series."
+        )
+        # Test for pandas doc when method is not defined on the plugin module
+        assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
+        assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
+        # Test for override
+        assert pd.read_csv.__doc__ == "Test override for functions on the module."
+        # Test for pandas doc when function is not defined on module.
+        assert pandas.read_table.__doc__ in pd.read_table.__doc__
+
+    def test_not_redefining_classes_modin_issue_7138(self):
+        original_dataframe_class = pd.DataFrame
+
+        # Put the docs_module on the path
+        sys.path.append(f"{os.path.dirname(__file__)}")
+        DocModule.put("docs_module")
+
+        # Test for override
+        assert (
+            pd.DataFrame.apply.__doc__
+            == "This is a test of the documentation module for DataFrame."
+        )
+
+        assert pd.DataFrame is original_dataframe_class
+
+    def test_base_docstring_override_with_no_dataframe_or_series_class_modin_issue_7113(
+        self,
+    ):
+        # TODO(https://github.com/modin-project/modin/issues/7113): Upstream
+        # this test case to Modin. This test case tests scenario 1 from issue 7113.
+        sys.path.append(f"{os.path.dirname(__file__)}")
+        DocModule.put("docs_module_with_just_base")
+        assert pd.base.BasePandasDataset.astype.__doc__ == (
+            "This is a test of the documentation module for BasePandasDataSet.astype."
+        )
+
+    def test_base_property_not_overridden_in_either_subclass_modin_issue_7113(self):
+        # Put the docs_module on the path
+        sys.path.append(f"{os.path.dirname(__file__)}")
+        DocModule.put("docs_module")
+
+        assert (
+            pd.base.BasePandasDataset.loc.__doc__
+            == "This is a test of the documentation module for BasePandasDataset.loc."
+        )
+        assert pd.DataFrame.loc.__doc__ == pd.base.BasePandasDataset.loc.__doc__
+        assert pd.Series.loc.__doc__ == pd.base.BasePandasDataset.loc.__doc__
+
+
+@pytest.mark.skip(reason="Not testing HDK")
+def test_hdk_envvar():
+    try:
+        import pyhdk
+
+        defaults = cfg.HdkLaunchParameters.get()
+        assert defaults["enable_union"] == 1
+        if version.parse(pyhdk.__version__) >= version.parse("0.6.1"):
+            assert defaults["log_dir"] == "pyhdk_log"
+        del cfg.HdkLaunchParameters._value
+    except ImportError:
+        # This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
+        pass
+
+    os.environ[cfg.HdkLaunchParameters.varname] = "enable_union=2,enable_thrift_logs=3"
+    params = cfg.HdkLaunchParameters.get()
+    assert params["enable_union"] == 2
+    assert params["enable_thrift_logs"] == 3
+
+    os.environ[cfg.HdkLaunchParameters.varname] = "unsupported=X"
+    del cfg.HdkLaunchParameters._value
+    params = cfg.HdkLaunchParameters.get()
+    assert params["unsupported"] == "X"
+    try:
+        import pyhdk
+
+        pyhdk.buildConfig(**cfg.HdkLaunchParameters.get())
+    except RuntimeError as e:
+        assert str(e) == "unrecognised option '--unsupported'"
+    except ImportError:
+        # This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
+        pass
+
+    os.environ[
+        cfg.HdkLaunchParameters.varname
+    ] = "enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
+    del cfg.HdkLaunchParameters._value
+    params = cfg.HdkLaunchParameters.get()
+    assert params["enable_union"] == 4
+    assert params["enable_thrift_logs"] == 5
+    assert params["enable_lazy_dict_materialization"] == 6
+
+
+@pytest.mark.parametrize(
+    "deprecated_var, new_var",
+    [
+        (cfg.ExperimentalGroupbyImpl, cfg.RangePartitioningGroupby),
+        (cfg.ExperimentalNumPyAPI, cfg.ModinNumpy),
+    ],
+)
+def test_deprecated_bool_vars_warnings(deprecated_var, new_var):
+    """Test that deprecated parameters are raising `FutureWarnings` and their replacements don't."""
+    old_depr_val = deprecated_var.get()
+    old_new_var = new_var.get()
+
+    try:
+        reset_vars(deprecated_var, new_var)
+        with pytest.warns(FutureWarning):
+            deprecated_var.get()
+
+        with pytest.warns(FutureWarning):
+            deprecated_var.put(False)
+
+        with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "1"}):
+            with pytest.warns(FutureWarning):
+                _check_vars()
+
+        # check that the new var doesn't raise any warnings
+        reset_vars(deprecated_var, new_var)
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            new_var.get()
+            new_var.put(False)
+            with unittest.mock.patch.dict(os.environ, {new_var.varname: "1"}):
+                _check_vars()
+    finally:
+        deprecated_var.put(old_depr_val)
+        new_var.put(old_new_var)
+
+
+@pytest.mark.parametrize(
+    "deprecated_var, new_var",
+    [
+        (cfg.ExperimentalGroupbyImpl, cfg.RangePartitioningGroupby),
+        (cfg.ExperimentalNumPyAPI, cfg.ModinNumpy),
+    ],
+)
+@pytest.mark.parametrize("get_depr_first", [True, False])
+def test_deprecated_bool_vars_equals(deprecated_var, new_var, get_depr_first):
+    """
+    Test that deprecated parameters always have values equal to the new replacement parameters.
+
+    Parameters
+    ----------
+    deprecated_var : EnvironmentVariable
+    new_var : EnvironmentVariable
+    get_depr_first : bool
+        Defines an order in which the ``.get()`` method should be called when comparing values.
+        If ``True``: get deprecated value at first ``deprecated_var.get() == new_var.get() == value``.
+        If ``False``: get new value at first ``new_var.get() == deprecated_var.get() == value``.
+        The logic of the ``.get()`` method depends on which parameter was initialized first,
+        that's why it's worth testing both cases.
+    """
+    old_depr_val = deprecated_var.get()
+    old_new_var = new_var.get()
+
+    def get_values():
+        return (
+            (deprecated_var.get(), new_var.get())
+            if get_depr_first
+            else (new_var.get(), deprecated_var.get())
+        )
+
+    try:
+        # case1: initializing the value using 'deprecated_var'
+        reset_vars(deprecated_var, new_var)
+        deprecated_var.put(True)
+        val1, val2 = get_values()
+        assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+        new_var.put(False)
+        val1, val2 = get_values()
+        assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+        new_var.put(True)
+        val1, val2 = get_values()
+        assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+        deprecated_var.put(False)
+        val1, val2 = get_values()
+        assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+        # case2: initializing the value using 'new_var'
+        reset_vars(deprecated_var, new_var)
+        new_var.put(True)
+        val1, val2 = get_values()
+        assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+        deprecated_var.put(False)
+        val1, val2 = get_values()
+        assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+        deprecated_var.put(True)
+        val1, val2 = get_values()
+        assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+        new_var.put(False)
+        val1, val2 = get_values()
+        assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+        # case3: initializing the value using 'deprecated_var' with env variable
+        reset_vars(deprecated_var, new_var)
+        with unittest.mock.patch.dict(os.environ, {deprecated_var.varname: "True"}):
+            val1, val2 = get_values()
+            assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+            new_var.put(False)
+            val1, val2 = get_values()
+            assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+            new_var.put(True)
+            val1, val2 = get_values()
+            assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+            deprecated_var.put(False)
+            val1, val2 = get_values()
+            assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+        # case4: initializing the value using 'new_var' with env variable
+        reset_vars(deprecated_var, new_var)
+        with unittest.mock.patch.dict(os.environ, {new_var.varname: "True"}):
+            val1, val2 = get_values()
+            assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+            deprecated_var.put(False)
+            val1, val2 = get_values()
+            assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+
+            deprecated_var.put(True)
+            val1, val2 = get_values()
+            assert val1 == val2 == True  # noqa: E712 ('obj == True' comparison)
+
+            new_var.put(False)
+            val1, val2 = get_values()
+            assert val1 == val2 == False  # noqa: E712 ('obj == False' comparison)
+    finally:
+        deprecated_var.put(old_depr_val)
+        new_var.put(old_new_var)
diff --git a/tests/unit/modin/modin/test_parameter.py b/tests/unit/modin/modin/test_parameter.py
new file mode 100644
index 00000000000..1243e832722
--- /dev/null
+++ b/tests/unit/modin/modin/test_parameter.py
@@ -0,0 +1,107 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+from collections import defaultdict
+
+import pytest
+
+from snowflake.snowpark.modin.config import Parameter
+
+
+def make_prefilled(vartype, varinit):
+    class Prefilled(Parameter, type=vartype):
+        @classmethod
+        def _get_raw_from_config(cls):
+            return varinit
+
+    return Prefilled
+
+
+@pytest.fixture
+def prefilled_parameter():
+    return make_prefilled(str, "init")
+
+
+def test_equals(prefilled_parameter):
+    assert prefilled_parameter.get() == "Init"
+
+    prefilled_parameter.put("value2")
+    assert prefilled_parameter.get() == "Value2"
+
+
+def test_triggers(prefilled_parameter):
+    results = defaultdict(int)
+    callbacks = []
+
+    def make_callback(name, res=results):
+        def callback(p: Parameter):
+            res[name] += 1
+
+        # keep reference to callbacks so they won't be removed by GC
+        callbacks.append(callback)
+        return callback
+
+    prefilled_parameter.once("init", make_callback("init"))
+    assert results["init"] == 1
+
+    prefilled_parameter.once("never", make_callback("never"))
+    prefilled_parameter.once("once", make_callback("once"))
+    prefilled_parameter.subscribe(make_callback("subscribe"))
+
+    prefilled_parameter.put("multi")
+    prefilled_parameter.put("once")
+    prefilled_parameter.put("multi")
+    prefilled_parameter.put("once")
+
+    expected = [("init", 1), ("never", 0), ("once", 1), ("subscribe", 5)]
+    for name, val in expected:
+        assert results[name] == val, f"{name} has wrong count"
+
+
+@pytest.mark.parametrize(
+    "parameter,good,bad",
+    [
+        (make_prefilled(bool, "false"), {"1": True, False: False}, ["nope", 2]),
+        (make_prefilled(int, "10"), {" 15\t": 15, 25: 25}, ["-10", 1.0, "foo"]),
+        (
+            make_prefilled(dict, "key = value"),
+            {
+                "KEY1 = VALUE1, KEY2=VALUE2=VALUE3,KEY3=0": {
+                    "KEY1": "VALUE1",
+                    "KEY2": "VALUE2=VALUE3",
+                    "KEY3": 0,
+                },
+                "KEY=1": {"KEY": 1},
+            },
+            ["key1=some,string", "key1=value1,key2=", "random string"],
+        ),
+    ],
+)
+def test_validation(parameter, good, bad):
+    for inval, outval in good.items():
+        parameter.put(inval)
+        assert parameter.get() == outval
+    for inval in bad:
+        with pytest.raises(ValueError):
+            parameter.put(inval)
+
+
+@pytest.mark.parametrize("vartype", [bool, int, dict])
+def test_init_validation(vartype):
+    parameter = make_prefilled(vartype, "bad value")
+    with pytest.raises(ValueError):
+        parameter.get()
diff --git a/tests/unit/modin/test_aggregation_utils.py b/tests/unit/modin/test_aggregation_utils.py
new file mode 100644
index 00000000000..5434387ba71
--- /dev/null
+++ b/tests/unit/modin/test_aggregation_utils.py
@@ -0,0 +1,105 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import numpy as np
+import pytest
+
+from snowflake.snowpark.modin.plugin._internal.aggregation_utils import (
+    check_is_aggregation_supported_in_snowflake,
+    is_supported_snowflake_agg_func,
+)
+
+
+@pytest.mark.parametrize(
+    "agg_func, agg_kwargs, axis, is_valid",
+    [
+        (np.sum, {}, 0, True),
+        (np.sum, {}, 1, True),
+        (np.mean, {}, 0, True),
+        (np.mean, {}, 1, False),
+        (np.median, {}, 0, True),
+        (np.median, {}, 1, False),
+        (np.max, {}, 0, True),
+        (np.max, {}, 1, True),
+        (np.min, {}, 0, True),
+        (np.min, {}, 1, True),
+        ("median", {}, 0, True),
+        ("median", {}, 1, False),
+        ("max", {}, 0, True),
+        ("max", {}, 1, True),
+        ("count", {}, 0, True),
+        ("count", {}, 1, True),
+        ("min", {}, 0, True),
+        ("min", {}, 1, True),
+        ("test", {}, 0, False),
+        ("test", {}, 1, False),
+        (np.random, {}, 0, False),
+        (np.random, {}, 1, False),
+        (np.std, {}, 0, True),
+        (np.std, {}, 1, False),
+        ("std", {"ddof": 0}, 0, True),
+        ("std", {"ddof": 0}, 1, False),
+        ("std", {"ddof": 4}, 0, False),
+        ("std", {"ddof": 4}, 1, False),
+        (np.var, {"ddof": 1}, 0, True),
+        (np.var, {"ddof": 1}, 1, False),
+        ("var", {"ddof": 5}, 0, False),
+        ("var", {"ddof": 5}, 1, False),
+        ("quantile", {}, 0, True),
+        ("quantile", {"q": [0.1, 0.2]}, 0, False),
+        ("quantile", {"interpolation": "nearest"}, 0, True),
+        ("quantile", {"interpolation": "midpoint"}, 0, False),
+        ("quantile", {}, 1, False),
+    ],
+)
+def test_is_supported_snowflake_agg_func(agg_func, agg_kwargs, axis, is_valid) -> None:
+    assert is_supported_snowflake_agg_func(agg_func, agg_kwargs, axis) is is_valid
+
+
+@pytest.mark.parametrize(
+    "agg_func, agg_kwargs, expected_result",
+    [
+        ("max", {}, True),  # snowflake supported aggregation function str
+        (np.sum, {}, True),  # snowflake supported aggregation function numpy function
+        (np.quantile, {}, False),  # snowflake unsupported aggregation function
+        (
+            {"col1": "max", "col2": np.sum},
+            {},
+            True,
+        ),  # dictionary fo aggregation functions
+        ({"col1": np.min, "col2": ["max", "sum"]}, {}, True),
+        ({"col1": np.quantile, "col2": [np.mean, "max"]}, {}, False),
+        ({"col1": "max", "col2": ["min", np.quantile, "max"]}, {}, False),
+        ([np.min, "max", "max", np.sum], {}, True),
+        ([np.percentile, min, sum, "min"], {}, False),
+        ("std", {}, True),  # std with no ddof configured (default 1)
+        ("std", {"ddof": 0}, True),  # std with ddof 0
+        ("std", {"ddof": 10}, False),  # std with ddof 10
+        ("var", {"ddof": 1}, True),  # var with ddof 1
+        ("var", {"ddof": 5}, False),  # var with ddof 5
+        (["var", "max", "std"], {"ddof": 1}, True),
+        (
+            "quantile",
+            {},
+            True,
+        ),  # quantile with no interpolation (default "linear") and no quantile (default 0.5)
+        (
+            "quantile",
+            {"q": [0.1, 0.2]},
+            False,
+        ),  # agg("quantile") with list q is unsupported because result has multiple rows
+        ("quantile", {"interpolation": "linear"}, True),
+        ("quantile", {"interpolation": "nearest"}, True),
+        ("quantile", {"interpolation": "lower"}, False),
+        ("quantile", {"interpolation": "higher"}, False),
+        ("quantile", {"interpolation": "midpoint"}, False),
+    ],
+)
+def test_check_aggregation_snowflake_execution_capability_by_args(
+    agg_func, agg_kwargs, expected_result
+):
+    can_be_distributed = check_is_aggregation_supported_in_snowflake(
+        agg_func=agg_func, agg_kwargs=agg_kwargs, axis=0
+    )
+    assert can_be_distributed == expected_result
diff --git a/tests/unit/modin/test_apply_utils.py b/tests/unit/modin/test_apply_utils.py
new file mode 100644
index 00000000000..773991a058b
--- /dev/null
+++ b/tests/unit/modin/test_apply_utils.py
@@ -0,0 +1,96 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from typing import Any
+
+import numpy as np
+import pandas as native_pd
+import pytest
+
+from snowflake.snowpark.modin.plugin._internal.apply_utils import (
+    convert_numpy_int_result_to_int,
+    deduce_return_type_from_function,
+    handle_missing_value_in_variant,
+)
+from snowflake.snowpark.types import (
+    ArrayType,
+    FloatType,
+    LongType,
+    MapType,
+    StringType,
+    VariantType,
+)
+
+
+def test_handle_missing_value_in_variant():
+    assert handle_missing_value_in_variant(0) == 0
+    assert handle_missing_value_in_variant(-1.1) == -1.1
+    assert handle_missing_value_in_variant("") == ""
+    assert handle_missing_value_in_variant(False) is False
+    assert handle_missing_value_in_variant(None) is None
+    assert handle_missing_value_in_variant(np.nan).is_sql_null
+    assert handle_missing_value_in_variant(native_pd.NA).is_sql_null
+    assert handle_missing_value_in_variant(native_pd.NaT).is_sql_null
+
+
+# test functions, this could be expanded - relying on snowpark tests to capture this more widely.
+# specifically test here the branches from deduce_return_type_from_function
+def foo_int(x: int) -> int:
+    return 42
+
+
+def foo_float(x: int) -> float:
+    return 42.1
+
+
+def foo_str(x: int) -> str:
+    return "test"
+
+
+def foo_obj(x: int) -> object:
+    return 42
+
+
+def foo_any(x: Any) -> Any:
+    return 42
+
+
+@pytest.mark.parametrize(
+    "func,datatype",
+    [
+        (foo_int, LongType),
+        (foo_float, FloatType),
+        (foo_str, StringType),
+        ([1, 2, 3], ArrayType),
+        ({10: 20}, MapType),
+        (foo_obj, VariantType),
+        (foo_any, None),
+    ],
+)
+def test_deduce_return_type_from_function(func, datatype):
+    if datatype is not None:
+        # type could be deduced
+        assert isinstance(deduce_return_type_from_function(func), datatype)
+    else:
+        # type could not be deduced
+        assert deduce_return_type_from_function(func) is None
+
+
+@pytest.mark.parametrize(
+    "value,expected",
+    [
+        (3, 3),
+        (np.int8(6), 6),
+        (np.int16(7), 7),
+        (np.int32(3), 3),
+        (np.int64(4), 4),
+        (np.uint8(5), 5),
+        (np.float64(5), np.float64(5)),
+        (["a", np.int64(1)], ["a", np.int64(1)]),
+        ({np.int32(2): np.int64(3)}, {np.int32(2): np.int64(3)}),
+    ],
+)
+def test_convert_numpy_ints_in_result_to_ints(value, expected):
+    result = convert_numpy_int_result_to_int(value)
+    assert type(result) == type(expected)
+    assert result == expected
diff --git a/tests/unit/modin/test_binary_op_utils.py b/tests/unit/modin/test_binary_op_utils.py
new file mode 100644
index 00000000000..bd523ded5dd
--- /dev/null
+++ b/tests/unit/modin/test_binary_op_utils.py
@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import pytest
+
+from snowflake.snowpark.modin.plugin._internal.binary_op_utils import (
+    merge_label_and_identifier_pairs,
+)
+
+
+@pytest.mark.parametrize(
+    "sorted_labels,a,b,expected",
+    [
+        ([1, 2, 3], [(1, "A"), (3, "C")], [(2, "B")], [(1, "A"), (2, "B"), (3, "C")]),
+        ([1, 2, 3], [(1, "A"), (2, "B"), (3, "C")], [], [(1, "A"), (2, "B"), (3, "C")]),
+        ([1, 2, 3], [], [(1, "A"), (2, "B"), (3, "C")], [(1, "A"), (2, "B"), (3, "C")]),
+    ],
+)
+def test_merge_sorted_labels(sorted_labels, a, b, expected):
+    assert merge_label_and_identifier_pairs(sorted_labels, a, b) == expected
diff --git a/tests/unit/modin/test_class.py b/tests/unit/modin/test_class.py
new file mode 100644
index 00000000000..d38dc94c208
--- /dev/null
+++ b/tests/unit/modin/test_class.py
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+
+def test_class_equivalence():
+    # all classes imported from native pandas in src/snowflake/snowpark/modin/pandas/__init__.py
+    # should be listed and tested here
+    # TODO: SNOW-837070 make these modules as a list in __init__.py so we can test from this list
+    assert pd.describe_option is native_pd.describe_option
+    assert pd.get_option is native_pd.get_option
+    assert pd.option_context is native_pd.option_context
+    assert pd.reset_option is native_pd.reset_option
+    assert pd.NA is native_pd.NA
+    assert pd.ArrowDtype is native_pd.ArrowDtype
+    assert pd.BooleanDtype is native_pd.BooleanDtype
+    assert pd.Categorical is native_pd.Categorical
+    assert pd.CategoricalDtype is native_pd.CategoricalDtype
+    assert pd.CategoricalIndex is native_pd.CategoricalIndex
+    assert pd.DateOffset is native_pd.DateOffset
+    assert pd.DatetimeIndex is native_pd.DatetimeIndex
+    assert pd.DatetimeTZDtype is native_pd.DatetimeTZDtype
+    assert pd.ExcelWriter is native_pd.ExcelWriter
+    assert pd.Flags is native_pd.Flags
+    assert pd.Float32Dtype is native_pd.Float32Dtype
+    assert pd.Float64Dtype is native_pd.Float64Dtype
+    assert pd.Grouper is native_pd.Grouper
+    assert pd.Index is native_pd.Index
+    assert pd.IndexSlice is native_pd.IndexSlice
+    assert pd.Int8Dtype is native_pd.Int8Dtype
+    assert pd.Int16Dtype is native_pd.Int16Dtype
+    assert pd.Int32Dtype is native_pd.Int32Dtype
+    assert pd.Int64Dtype is native_pd.Int64Dtype
+    assert pd.Interval is native_pd.Interval
+    assert pd.IntervalDtype is native_pd.IntervalDtype
+    assert pd.IntervalIndex is native_pd.IntervalIndex
+    assert pd.MultiIndex is native_pd.MultiIndex
+    assert pd.NamedAgg is native_pd.NamedAgg
+    assert pd.NaT is native_pd.NaT
+    assert pd.Period is native_pd.Period
+    assert pd.PeriodDtype is native_pd.PeriodDtype
+    assert pd.PeriodIndex is native_pd.PeriodIndex
+    assert pd.RangeIndex is native_pd.RangeIndex
+    assert pd.SparseDtype is native_pd.SparseDtype
+    assert pd.StringDtype is native_pd.StringDtype
+    assert pd.Timedelta is native_pd.Timedelta
+    assert pd.TimedeltaIndex is native_pd.TimedeltaIndex
+    assert pd.Timestamp is native_pd.Timestamp
+    assert pd.UInt8Dtype is native_pd.UInt8Dtype
+    assert pd.UInt16Dtype is native_pd.UInt16Dtype
+    assert pd.UInt32Dtype is native_pd.UInt32Dtype
+    assert pd.UInt64Dtype is native_pd.UInt64Dtype
+    # TODO: SNOW-1316523
+    # Modin defines its own `modin.pandas.api.extensions` module, which overwrites the attempted re-export
+    # of the native `pandas.api` module. Since our `modin.pandas` module follows this
+    # structure, we also overwrite this export.
+    # assert pd.api is native_pd.api
+    assert pd.array is native_pd.array
+    assert pd.bdate_range is native_pd.bdate_range
+    assert pd.eval is native_pd.eval
+    assert pd.factorize is native_pd.factorize
+    assert pd.from_dummies is native_pd.from_dummies
+    assert pd.infer_freq is native_pd.infer_freq
+    assert pd.interval_range is native_pd.interval_range
+    assert pd.options is native_pd.options
+    assert pd.period_range is native_pd.period_range
+    assert pd.set_eng_float_format is native_pd.set_eng_float_format
+    assert pd.set_option is native_pd.set_option
+    assert pd.test is native_pd.test
+    assert pd.timedelta_range is native_pd.timedelta_range
diff --git a/tests/unit/modin/test_docstrings.py b/tests/unit/modin/test_docstrings.py
new file mode 100644
index 00000000000..acea07685d8
--- /dev/null
+++ b/tests/unit/modin/test_docstrings.py
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+
+def test_base_property_snow_1305329():
+    assert "Snowpark pandas" in pd.base.BasePandasDataset.iloc.__doc__
diff --git a/tests/unit/modin/test_frontend_utils.py b/tests/unit/modin/test_frontend_utils.py
new file mode 100644
index 00000000000..3134d52c0f1
--- /dev/null
+++ b/tests/unit/modin/test_frontend_utils.py
@@ -0,0 +1,104 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from unittest.mock import MagicMock, patch
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.pandas.utils import (
+    create_empty_native_pandas_frame,
+    try_convert_builtin_func_to_str,
+    validate_and_try_convert_agg_func_arg_func_to_str,
+)
+
+# Tests for modin.pandas.utils
+
+
+@pytest.mark.parametrize("index_names", [["C"], ["C", "D"], [None]])
+@patch(
+    "snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler.SnowflakeQueryCompiler"
+)
+def test_create_empty_frame_from_dataframe(mock_query_compiler, index_names):
+    columns = ["A", "B"]
+    mock_query_compiler.columns = columns
+    mock_query_compiler.get_index_names = MagicMock(return_value=index_names)
+    snow_df = pd.DataFrame(query_compiler=mock_query_compiler)
+    native_df = create_empty_native_pandas_frame(snow_df)
+
+    # Verify columns
+    assert native_df.columns.tolist() == columns
+    # Verify index type
+    if len(index_names) > 1:
+        assert isinstance(native_df.index, native_pd.MultiIndex)
+    else:
+        assert isinstance(native_df.index, native_pd.Index)
+    # Verify index names
+    assert native_df.index.names == index_names
+
+
+@pytest.mark.parametrize("index_names", [["C"], ["C", "D"], [None]])
+@patch(
+    "snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler.SnowflakeQueryCompiler"
+)
+def test_create_empty_frame_from_series(mock_query_compiler, index_names):
+    name = "A"
+    mock_query_compiler.columns = [name]
+    mock_query_compiler.get_index_names = MagicMock(return_value=index_names)
+    mock_query_compiler.columnarize = MagicMock(return_value=mock_query_compiler)
+    snow_series = pd.Series(query_compiler=mock_query_compiler)
+    native_df = create_empty_native_pandas_frame(snow_series)
+
+    # Verify columns
+    assert native_df.columns.tolist() == [name]
+    # Verify index type
+    if len(index_names) > 1:
+        assert isinstance(native_df.index, native_pd.MultiIndex)
+    else:
+        assert isinstance(native_df.index, native_pd.Index)
+    # Verify index names
+    assert native_df.index.names == index_names
+
+
+@pytest.mark.parametrize(
+    "agg_func, expected_res",
+    [
+        (max, "max"),
+        (("count", np.max, min), ["count", np.max, "min"]),
+        ([max, "min"], ["max", "min"]),
+        ([np.max, np.percentile, min], [np.max, np.percentile, "min"]),
+        ([sum, max, min], ["sum", "max", "min"]),
+    ],
+)
+def test_try_convert_builtin_func_to_str(mock_dataframe, agg_func, expected_res):
+    result = try_convert_builtin_func_to_str(agg_func, mock_dataframe)
+    assert result == expected_res
+
+
+@pytest.mark.parametrize(
+    "agg_func, axis, expected_res",
+    [
+        (max, 0, "max"),
+        (max, 1, "max"),
+        (("count", np.max, min), 0, ["count", np.max, "min"]),
+        (
+            {"A": max, "B": [np.max, "count"], "C": [sum]},
+            0,
+            {"A": "max", "B": [np.max, "count"], "C": ["sum"]},
+        ),
+    ],
+)
+def test_validate_and_try_convert_agg_func_arg_func_to_str(
+    mock_dataframe_multicolumns, agg_func, axis, expected_res
+):
+    result = validate_and_try_convert_agg_func_arg_func_to_str(
+        agg_func,
+        mock_dataframe_multicolumns,
+        allow_duplication=True,
+        axis=axis,
+    )
+    assert result == expected_res
diff --git a/tests/unit/modin/test_groupby_unsupported.py b/tests/unit/modin/test_groupby_unsupported.py
new file mode 100644
index 00000000000..395c7cc891d
--- /dev/null
+++ b/tests/unit/modin/test_groupby_unsupported.py
@@ -0,0 +1,109 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pytest
+
+
+@pytest.mark.parametrize(
+    "func, func_name",
+    [
+        (lambda se: se.groupby("A").__iter__(), "_iter"),
+        (lambda se: se.groupby("A").__len__(), "__len__"),
+        (lambda se: se.groupby("A").__bytes__(), "__bytes__"),
+        (lambda se: se.groupby("A").corrwith, "corrwith"),
+        (lambda se: se.groupby("A").get_group("group"), "get_group"),
+        (lambda se: se.groupby("A").dtypes, "dtypes"),
+        (lambda se: se.groupby("A").pipe(lambda x: x.max() - x.min()), "pipe"),
+        (lambda se: se.groupby("A").filter(lambda x: x.mean() > 3), "filter"),
+        (lambda se: se.groupby("A").all(), "all"),
+        (lambda se: se.groupby("A").any(), "any"),
+        (lambda se: se.groupby("A").bfill(limit=1), "bfill"),
+        (lambda se: se.groupby("A").corr(), "corr"),
+        (lambda se: se.groupby("A").cov(), "cov"),
+        (lambda se: se.groupby("A").cumprod(), "cumprod"),
+        (lambda se: se.groupby("A").describe(), "describe"),
+        (lambda se: se.groupby("A").diff(), "diff"),
+        (lambda se: se.groupby("A").ffill(), "ffill"),
+        (lambda se: se.groupby("A").fillna("ffill"), "fillna"),
+        (lambda se: se.groupby("A").first(min_count=5), "first"),
+        (lambda se: se.groupby("A").last(), "last"),
+        (lambda se: se.groupby("A").is_monotonic_increasing, "is_monotonic_increasing"),
+        (lambda se: se.groupby("A").is_monotonic_decreasing, "is_monotonic_decreasing"),
+        (lambda se: se.groupby("A").ngroup(), "ngroup"),
+        (lambda se: se.groupby("A").nlargest(4), "nlargest"),
+        (lambda se: se.groupby("A").nsmallest(4), "nsmallest"),
+        (lambda se: se.groupby("A").nth(5), "nth"),
+        (lambda se: se.groupby("A").unique(), "unique"),
+        (lambda se: se.groupby("A").ohlc(), "ohlc"),
+        (lambda se: se.groupby("A").pct_change(), "pct_change"),
+        (lambda se: se.groupby("A").prod(), "prod"),
+        (lambda se: se.groupby("A").resample("3T"), "resample"),
+        (lambda se: se.groupby("A").rolling(2), "rolling"),
+        (lambda se: se.groupby("A").sample(n=1, random_state=1), "sample"),
+        (lambda se: se.groupby("A").sem(), "sem"),
+        (lambda se: se.groupby("A").size(), "size"),
+        (lambda se: se.groupby("A").skew(), "skew"),
+        (lambda se: se.groupby("A").take(2), "take"),
+        (lambda se: se.groupby("A").expanding(), "expanding"),
+        (lambda se: se.groupby("A").value_counts(), "value_counts"),
+        (lambda se: se.groupby("A").hist(), "hist"),
+        (lambda se: se.groupby("A").plot(), "plot"),
+        (lambda se: se.groupby("A").boxplot("test_group"), "boxplot"),
+    ],
+)
+def test_series_groupby_unsupported_methods_raises(
+    mock_series, func, func_name
+) -> None:
+    msg = f"{func_name} is not implemented for GroupBy"
+    with pytest.raises(NotImplementedError, match=msg):
+        func(mock_series)
+
+
+@pytest.mark.parametrize(
+    "func, func_name",
+    [
+        (lambda df: df.groupby("A").__iter__(), "_iter"),
+        (lambda df: df.groupby("A").__len__(), "__len__"),
+        (lambda df: df.groupby("A").__bytes__(), "__bytes__"),
+        (lambda df: df.groupby("A").corrwith, "corrwith"),
+        (lambda df: df.groupby("A").get_group("group"), "get_group"),
+        (lambda df: df.groupby("A").dtypes, "dtypes"),
+        (lambda df: df.groupby("A").pipe(lambda x: x.max() - x.min()), "pipe"),
+        (lambda df: df.groupby("A").filter(lambda x: x.mean() > 3), "filter"),
+        (lambda df: df.groupby("A").all(), "all"),
+        (lambda df: df.groupby("A").any(), "any"),
+        (lambda df: df.groupby("A").bfill(limit=1), "bfill"),
+        (lambda df: df.groupby("A").corr(), "corr"),
+        (lambda df: df.groupby("A").cov(), "cov"),
+        (lambda df: df.groupby("A").cumprod(), "cumprod"),
+        (lambda df: df.groupby("A").describe(), "describe"),
+        (lambda df: df.groupby("A").diff(), "diff"),
+        (lambda df: df.groupby("A").ffill(), "ffill"),
+        (lambda df: df.groupby("A").fillna("ffill"), "fillna"),
+        (lambda df: df.groupby("A").first(min_count=5), "first"),
+        (lambda df: df.groupby("A").last(), "last"),
+        (lambda df: df.groupby("A").ngroup(), "ngroup"),
+        (lambda df: df.groupby("A").nth(5), "nth"),
+        (lambda df: df.groupby("A").ohlc(), "ohlc"),
+        (lambda df: df.groupby("A").pct_change(), "pct_change"),
+        (lambda df: df.groupby("A").prod(), "prod"),
+        (lambda df: df.groupby("A").resample("3T"), "resample"),
+        (lambda df: df.groupby("A").rolling(2), "rolling"),
+        (lambda df: df.groupby("A").sample(n=1, random_state=1), "sample"),
+        (lambda df: df.groupby("A").sem(), "sem"),
+        (lambda df: df.groupby("A").size(), "size"),
+        (lambda df: df.groupby("A").skew(), "skew"),
+        (lambda df: df.groupby("A").take(2), "take"),
+        (lambda df: df.groupby("A").expanding(), "expanding"),
+        (lambda df: df.groupby("A").value_counts(), "value_counts"),
+        (lambda df: df.groupby("A").hist(), "hist"),
+        (lambda df: df.groupby("A").plot(), "plot"),
+        (lambda df: df.groupby("A").boxplot("test_group"), "boxplot"),
+    ],
+)
+def test_dataframe_groupby_unsupported_methods_raises(
+    mock_dataframe, func, func_name
+) -> None:
+    msg = f"{func_name} is not implemented for GroupBy"
+    with pytest.raises(NotImplementedError, match=msg):
+        func(mock_dataframe)
diff --git a/tests/unit/modin/test_groupby_utils.py b/tests/unit/modin/test_groupby_utils.py
new file mode 100644
index 00000000000..55ade9e6919
--- /dev/null
+++ b/tests/unit/modin/test_groupby_utils.py
@@ -0,0 +1,96 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from unittest import mock
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
+    check_is_groupby_supported_by_snowflake,
+    is_groupby_value_label_like,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+
+
+def create_series_query_compiler() -> SnowflakeQueryCompiler:
+    mock_internal_frame = mock.create_autospec(InternalFrame)
+    mock_internal_frame.data_columns_index = native_pd.Index(["A"], name="B")
+    fake_query_compiler = SnowflakeQueryCompiler(mock_internal_frame)
+
+    return fake_query_compiler
+
+
+def create_df_query_compiler() -> SnowflakeQueryCompiler:
+    mock_internal_frame = mock.create_autospec(InternalFrame)
+    mock_internal_frame.data_columns_index = native_pd.Index(["B", "C"], name=None)
+    fake_query_compiler = SnowflakeQueryCompiler(mock_internal_frame)
+
+    return fake_query_compiler
+
+
+@pytest.mark.parametrize(
+    "by, expected_result",
+    [
+        ("col", True),  # hashable
+        (["col1", "col2"], True),  # list of hashable
+        (
+            pd.Series(query_compiler=create_series_query_compiler()),
+            False,
+        ),  # SnowflakeQueryCompiler
+        (
+            pd.Series(query_compiler=create_series_query_compiler()),
+            False,
+        ),  # SnowSeries
+        (lambda x: x + 1, False),  # Callable
+        (
+            [
+                "col1",
+                pd.Series(query_compiler=create_series_query_compiler()),
+                "col2",
+            ],
+            False,
+        ),
+        ([lambda x: x // 3, "col1", "col2"], False),
+        (["col1", [1, 2, 3]], False),
+        (["col1", None, "col2"], True),
+    ],
+)
+def test_check_groupby_snowflake_execution_by_args(by, expected_result):
+    can_be_distributed = check_is_groupby_supported_by_snowflake(
+        by=by, level=None, axis=0
+    )
+    assert can_be_distributed == expected_result
+
+
+def test_check_groupby_snowflake_execution_by_args_axis_1():
+    can_be_distributed = check_is_groupby_supported_by_snowflake(
+        by="col1", level=None, axis=1
+    )
+    assert not can_be_distributed
+
+
+@pytest.mark.parametrize(
+    "val, expected_result",
+    [
+        ("col", True),  # hashable
+        (("col1", "col2"), True),  # hashable
+        (lambda x: x + 1, False),  # callable
+        ([1, 2, 3], False),  # list like
+        (3, True),  # scalar
+        (np.array([1, 2, 3]), False),
+        (None, True),
+        (pd.Series(query_compiler=create_series_query_compiler()), False),
+        ({"col1": 0, "col2": 1}, False),  # map
+        (pd.Grouper(level=1), False),  # grouper
+    ],
+)
+def test_is_groupby_value_label_like(val, expected_result):
+    assert is_groupby_value_label_like(val) == expected_result
diff --git a/tests/unit/modin/test_internal_frame.py b/tests/unit/modin/test_internal_frame.py
new file mode 100644
index 00000000000..3d2eb863b4c
--- /dev/null
+++ b/tests/unit/modin/test_internal_frame.py
@@ -0,0 +1,701 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import re
+from unittest.mock import patch
+
+import pandas as pd
+import pytest
+from pandas.testing import assert_index_equal
+
+from snowflake.snowpark._internal.analyzer.sort_expression import (
+    Ascending,
+    Descending,
+    NullsFirst,
+    NullsLast,
+    SortOrder,
+)
+from snowflake.snowpark.functions import col
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.types import (
+    ColumnIdentifier,
+    IntegerType,
+    StructField,
+    StructType,
+)
+from tests.integ.modin.utils import VALID_PANDAS_LABELS
+
+
+class TestDataFrames:
+    def __init__(self, sp_df: OrderedDataFrame, internal: InternalFrame) -> None:
+        self.ordered_dataframe: OrderedDataFrame = sp_df
+        self.internal_frame: InternalFrame = internal
+
+
+@pytest.fixture(scope="function")
+@patch("snowflake.snowpark.dataframe.DataFrame")
+def test_dataframes(mock_dataframe) -> TestDataFrames:
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier('"a"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"b"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"C"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"d"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"INDEX"'), datatype=IntegerType
+            ),
+        ]
+    )
+    mock_dataframe.schema = snowpark_df_schema
+    mock_dataframe.select.return_value = mock_dataframe
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_dataframe), ordering_columns=[OrderingColumn('"INDEX"')]
+    )
+
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=["a", "b", "C", "a"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"', '"d"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    return TestDataFrames(ordered_dataframe, internal_frame)
+
+
+@pytest.fixture(scope="function")
+@patch("snowflake.snowpark.dataframe.DataFrame")
+def test_dataframes_with_multiindex_on_column(mock_dataframe) -> TestDataFrames:
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier("\"('a', 'C')\""),
+                datatype=IntegerType,
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier("\"('b', 'D')\""),
+                datatype=IntegerType,
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"INDEX"'), datatype=IntegerType
+            ),
+        ]
+    )
+    mock_dataframe.schema = snowpark_df_schema
+    mock_dataframe.select.return_value = mock_dataframe
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_dataframe), ordering_columns=[OrderingColumn('"INDEX"')]
+    )
+
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=[("a", "C"), ("b", "D")],
+        data_column_pandas_index_names=["x", "y"],
+        data_column_snowflake_quoted_identifiers=["\"('a', 'C')\"", "\"('b', 'D')\""],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    return TestDataFrames(ordered_dataframe, internal_frame)
+
+
+def test_snowflake_quoted_identifier_without_quote_negative(test_dataframes) -> None:
+    with pytest.raises(AssertionError) as exc:
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes.ordered_dataframe,
+            data_column_pandas_labels=["a", "b", "C"],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=["a", "b", "c"],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        )
+
+    assert "Found not-quoted identifier for 'dataframe column':'a'" in str(exc.value)
+
+
+def test_column_labels_and_quoted_identifiers_have_same_length_negative(
+    test_dataframes,
+) -> None:
+    # check data columns
+    with pytest.raises(AssertionError):
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes.ordered_dataframe,
+            data_column_pandas_labels=["a", "b"],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=['"a"'],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        )
+
+    # check index columns
+    with pytest.raises(AssertionError):
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes.ordered_dataframe,
+            data_column_pandas_labels=["a", "b", "C"],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"'],
+            index_column_pandas_labels=[],
+            index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        )
+
+
+def test_internal_frame_missing_data_column_negative(test_dataframes):
+    with pytest.raises(AssertionError) as exc:
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes.ordered_dataframe,
+            data_column_pandas_labels=["a", "b", "C"],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"D"'],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        )
+
+    assert 'dataframe column="D" not found in snowpark dataframe schema' in str(
+        exc.value
+    )
+
+
+def test_internal_frame_missing_index_column_negative(test_dataframes):
+    with pytest.raises(AssertionError) as exc:
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes.ordered_dataframe,
+            data_column_pandas_labels=["a", "b", "C"],
+            data_column_pandas_index_names=[None],
+            data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"'],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=['"E"'],
+        )
+
+    assert 'dataframe column="E" not found in snowpark dataframe schema' in str(
+        exc.value
+    )
+
+
+def test_internal_frame_properties(test_dataframes) -> None:
+    internal_frame = test_dataframes.internal_frame
+    # check index_column_snowflake_quoted_identifiers
+    index_column_snowflake_quoted_identifiers = (
+        internal_frame.index_column_snowflake_quoted_identifiers
+    )
+    assert index_column_snowflake_quoted_identifiers == ['"INDEX"']
+
+    # check data_column_snowflake_quoted_identifiers
+    data_column_snowflake_quoted_identifiers = (
+        internal_frame.data_column_snowflake_quoted_identifiers
+    )
+    assert data_column_snowflake_quoted_identifiers == ['"a"', '"b"', '"C"', '"d"']
+
+    # check index_column_pandas_labels
+    index_column_pandas_labels = internal_frame.index_column_pandas_labels
+    assert index_column_pandas_labels == [None]
+
+    # check data_column_pandas_labels
+    data_column_pandas_labels = internal_frame.data_column_pandas_labels
+    assert data_column_pandas_labels == ["a", "b", "C", "a"]
+
+    # check ordering_column_snowflake_quoted_identifiers
+    ordering_column_snowflake_quoted_identifiers = (
+        internal_frame.ordering_column_snowflake_quoted_identifiers
+    )
+    assert ordering_column_snowflake_quoted_identifiers == ['"INDEX"']
+
+
+def test_pandas_label_as_empty_and_none(test_dataframes) -> None:
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["", "b", None],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.data_column_pandas_labels == ["", "b", None]
+    assert internal_frame.data_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"b"',
+        '"C"',
+    ]
+
+
+def test_ordering_column_snowpark_column() -> None:
+    ordering_column = OrderingColumn('"A"')
+    snowpark_column = ordering_column.snowpark_column
+    # default rule should be ascending and nulls last
+    assert isinstance(snowpark_column._expression, SortOrder)
+    assert isinstance(snowpark_column._expression.direction, Ascending)
+    assert isinstance(snowpark_column._expression.null_ordering, NullsLast)
+
+    # verify ascending false and nulls last
+    ordering_column = OrderingColumn('"A"', ascending=False)
+    snowpark_column = ordering_column.snowpark_column
+    assert isinstance(snowpark_column._expression, SortOrder)
+    assert isinstance(snowpark_column._expression.direction, Descending)
+    assert isinstance(snowpark_column._expression.null_ordering, NullsLast)
+
+    # verify ascending true, nulls first
+    ordering_column = OrderingColumn('"A"', na_last=False)
+    snowpark_column = ordering_column.snowpark_column
+    assert isinstance(snowpark_column._expression, SortOrder)
+    assert isinstance(snowpark_column._expression.direction, Ascending)
+    assert isinstance(snowpark_column._expression.null_ordering, NullsFirst)
+
+    # verify ascending false, nulls first
+    ordering_column = OrderingColumn('"A"', ascending=False, na_last=False)
+    snowpark_column = ordering_column.snowpark_column
+    assert isinstance(snowpark_column._expression, SortOrder)
+    assert isinstance(snowpark_column._expression.direction, Descending)
+    assert isinstance(snowpark_column._expression.null_ordering, NullsFirst)
+
+
+def test_internal_frame_ordering_columns(test_dataframes) -> None:
+    # ordering column is part of the index + data column
+    test_dataframes.ordered_dataframe._ordering_columns_tuple = (
+        OrderingColumn('"INDEX"'),
+        OrderingColumn('"a"'),
+        OrderingColumn('"C"'),
+    )
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["a", "b", "C"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.ordering_column_snowflake_quoted_identifiers == [
+        '"INDEX"',
+        '"a"',
+        '"C"',
+    ]
+
+    # ordering column is neither part of index nor data column
+    test_dataframes.ordered_dataframe._ordering_columns_tuple = (
+        OrderingColumn('"b"'),
+        OrderingColumn('"C"'),
+    )
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["a"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"a"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.ordering_column_snowflake_quoted_identifiers == [
+        '"b"',
+        '"C"',
+    ]
+
+    # ordering columns has column from data column, and column from none index
+    # or data column
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["a", "C"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=['"a"', '"C"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.ordering_column_snowflake_quoted_identifiers == [
+        '"b"',
+        '"C"',
+    ]
+
+
+@pytest.mark.parametrize("pandas_label", VALID_PANDAS_LABELS)
+def test_data_column_pandas_index_names(pandas_label, test_dataframes) -> None:
+    test_dataframes.ordered_dataframe._ordering_columns_tuple_tuple = (
+        OrderingColumn('"b"'),
+        OrderingColumn('"C"'),
+    )
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["a", "C"],
+        data_column_pandas_index_names=[pandas_label],
+        data_column_snowflake_quoted_identifiers=['"a"', '"C"'],
+        index_column_pandas_labels=[None],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.data_column_pandas_index_names == [pandas_label]
+
+
+def test_data_column_pandas_multiindex(
+    test_dataframes_with_multiindex_on_column,
+) -> None:
+    assert (
+        test_dataframes_with_multiindex_on_column.internal_frame.data_column_pandas_index_names
+        == ["x", "y"]
+    )
+
+
+@pytest.mark.parametrize(
+    "data_column_pandas_labels, data_column_pandas_index_names, expected_error_message",
+    [
+        (
+            [("a", "C"), ("b", "D")],
+            [],
+            "data_column_pandas_index_names cannot be empty",
+        ),
+        (
+            [("a", "C"), "b"],
+            ["x", "y"],
+            "pandas label on MultiIndex column must be a tuple with length larger than 1, but got",
+        ),
+        (
+            [("a", "C"), ("b",)],
+            ["x", "y"],
+            "pandas label on MultiIndex column must be a tuple with length larger than 1, but got",
+        ),
+        (
+            [
+                ("a", "C"),
+                (
+                    "b",
+                    "b",
+                    "b",
+                ),
+            ],
+            ["x", "y"],
+            "All tuples in data_column_pandas_labels must have the same length 2",
+        ),
+        (
+            [("a", "C"), ("b", "D")],
+            ["x", "y", "z"],
+            "All tuples in data_column_pandas_labels must have the same length 3",
+        ),
+    ],
+)
+def test_data_column_pandas_multiindex_negative(
+    test_dataframes_with_multiindex_on_column,
+    data_column_pandas_labels,
+    data_column_pandas_index_names,
+    expected_error_message,
+) -> None:
+    with pytest.raises(
+        AssertionError,
+        match=expected_error_message,
+    ):
+        InternalFrame.create(
+            ordered_dataframe=test_dataframes_with_multiindex_on_column.ordered_dataframe,
+            data_column_pandas_labels=data_column_pandas_labels,
+            data_column_pandas_index_names=data_column_pandas_index_names,
+            data_column_snowflake_quoted_identifiers=[
+                "\"('a', 'C')\"",
+                "\"('b', 'D')\"",
+            ],
+            index_column_pandas_labels=[None],
+            index_column_snowflake_quoted_identifiers=['"INDEX"'],
+        )
+
+
+def test_get_snowflake_quoted_identifiers_by_pandas_labels_empty(
+    test_dataframes,
+) -> None:
+    frame = test_dataframes.internal_frame
+    assert (
+        frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(pandas_labels=[])
+        == []
+    )
+
+
+def test_get_snowflake_quoted_identifiers_by_pandas_labels_empty_not_include_index(
+    test_dataframes,
+) -> None:
+    test_dataframes.ordered_dataframe._ordering_columns_tuple = (OrderingColumn('"a"'),)
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=test_dataframes.ordered_dataframe,
+        data_column_pandas_labels=["a", "b", "C"],
+        data_column_pandas_index_names=["index"],
+        data_column_snowflake_quoted_identifiers=['"a"', '"b"', '"C"'],
+        index_column_pandas_labels=["index"],
+        index_column_snowflake_quoted_identifiers=['"INDEX"'],
+    )
+
+    assert internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+        pandas_labels=["index", "a", "b", "C"], include_index=True
+    ) == [('"INDEX"',), ('"a"',), ('"b"',), ('"C"',)]
+
+    assert internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+        pandas_labels=["index", "a", "b", "C"], include_index=False
+    ) == [(), ('"a"',), ('"b"',), ('"C"',)]
+
+
+@pytest.mark.parametrize(
+    "labels, expected_identifiers",
+    [
+        (["a"], [('"a"', '"d"')]),
+        (["b"], [('"b"',)]),
+        (["C"], [('"C"',)]),
+        ([None], [('"INDEX"',)]),
+    ],
+)
+def test_get_snowflake_quoted_identifiers_by_pandas_labels(
+    test_dataframes, labels, expected_identifiers
+) -> None:
+    frame = test_dataframes.internal_frame
+    assert (
+        frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            pandas_labels=labels
+        )
+        == expected_identifiers
+    )
+
+
+@pytest.mark.parametrize(
+    "labels, expected_identifiers",
+    [(["a", "b"], [('"a"', '"d"'), ('"b"',)]), (["C", None], [('"C"',), ('"INDEX"',)])],
+)
+def test_get_snowflake_quoted_identifiers_by_pandas_labels_multiple(
+    test_dataframes, labels, expected_identifiers
+) -> None:
+    frame = test_dataframes.internal_frame
+    assert (
+        frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            pandas_labels=labels
+        )
+        == expected_identifiers
+    )
+
+
+@pytest.mark.parametrize(
+    "labels, expected_identifiers",
+    [(["A"], [()]), (["B"], [()]), (["c"], [()])],
+)
+def test_get_snowflake_quoted_identifiers_by_pandas_labels_case_sensitive(
+    test_dataframes, labels, expected_identifiers
+) -> None:
+    frame = test_dataframes.internal_frame
+    assert (
+        frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            pandas_labels=labels
+        )
+        == expected_identifiers
+    )
+
+
+@pytest.mark.parametrize(
+    "labels, expected_identifiers", [(["ABC"], [()]), (["abc"], [()])]
+)
+def test_get_snowflake_quoted_identifiers_by_pandas_labels_missing(
+    test_dataframes, labels, expected_identifiers
+) -> None:
+    frame = test_dataframes.internal_frame
+    assert (
+        frame.get_snowflake_quoted_identifiers_group_by_pandas_labels(
+            pandas_labels=labels
+        )
+        == expected_identifiers
+    )
+
+
+def test_data_columns_index(test_dataframes, test_dataframes_with_multiindex_on_column):
+    assert_index_equal(
+        test_dataframes.internal_frame.data_columns_index,
+        pd.Index(["a", "b", "C", "a"]),
+    )
+    assert_index_equal(
+        test_dataframes_with_multiindex_on_column.internal_frame.data_columns_index,
+        pd.MultiIndex.from_tuples([("a", "C"), ("b", "D")], names=["x", "y"]),
+    )
+
+
+def test_is_multiindex(test_dataframes, test_dataframes_with_multiindex_on_column):
+    assert not test_dataframes.internal_frame.is_multiindex(axis=0)
+    assert not test_dataframes.internal_frame.is_multiindex(axis=1)
+    assert not test_dataframes_with_multiindex_on_column.internal_frame.is_multiindex(
+        axis=0
+    )
+    assert test_dataframes_with_multiindex_on_column.internal_frame.is_multiindex(
+        axis=1
+    )
+
+    with pytest.raises(ValueError, match="'axis' can only be 0 or 1"):
+        test_dataframes.internal_frame.is_multiindex(axis=-1)
+
+
+def test_immutability(test_dataframes):
+    frame = test_dataframes.internal_frame
+
+    def verify_immutability(attr_name: str) -> None:
+        """
+        Verify immutability of list attributes.
+        """
+        original_value = getattr(frame, attr_name).copy()
+        # apply mutation
+        getattr(frame, attr_name).append("abc")
+        assert getattr(frame, attr_name) == original_value
+
+    verify_immutability("data_column_pandas_labels")
+    verify_immutability("data_column_snowflake_quoted_identifiers")
+    verify_immutability("index_column_pandas_labels")
+    verify_immutability("index_column_snowflake_quoted_identifiers")
+    verify_immutability("data_column_pandas_index_names")
+    verify_immutability("ordering_column_snowflake_quoted_identifiers")
+
+    frame.data_columns_index.set_names(["abc"], inplace=True)
+    assert frame.data_columns_index.names == [None]
+
+    frame.index_columns_index.set_names(["abc"], inplace=True)
+    assert frame.index_columns_index.names == [None]
+
+    assert len(frame.ordering_columns) == 1
+    frame.ordering_columns.append(OrderingColumn("abc"))
+    assert len(frame.ordering_columns) == 1
+
+    assert type(frame.label_to_snowflake_quoted_identifier) == tuple
+    assert type(frame.data_column_index_names) == tuple
+
+
+@pytest.mark.parametrize("level0, level1", [(1, 1), (2, 1), (1, 2), (2, 3)])
+@patch("snowflake.snowpark.dataframe.DataFrame")
+def test_num_levels(mock_dataframe, level0, level1):
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier('"a"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"b"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"x"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"y"'), datatype=IntegerType
+            ),
+        ]
+    )
+    mock_dataframe.schema = snowpark_df_schema
+    mock_dataframe.select.return_value = mock_dataframe
+
+    if level1 == 1:
+        data_column_pandas_levels = ["a", "b"]
+    else:
+        data_column_pandas_levels = [tuple(["a"] * level1), tuple(["a"] * level1)]
+
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_dataframe), ordering_columns=[OrderingColumn('"a"')]
+    )
+    frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=data_column_pandas_levels,
+        data_column_pandas_index_names=[None] * level1,
+        data_column_snowflake_quoted_identifiers=['"x"', '"y"'],
+        index_column_pandas_labels=[None] * level0,
+        index_column_snowflake_quoted_identifiers=['"a"', '"b"'][:level0],
+    )
+    assert frame.num_index_levels(axis=0) == level0
+    assert frame.num_index_levels(axis=1) == level1
+
+
+@pytest.mark.parametrize(
+    "pandas_labels, frame_identifier, expected_message",
+    [
+        (
+            ["A", "B"],
+            "condition",
+            "Multiple columns are mapped to each label in ['B'] in DataFrame condition",
+        ),
+        (
+            ["A", "B"],
+            None,
+            "Multiple columns are mapped to each label in ['B'] in DataFrame",
+        ),
+        (
+            ["A", "B", "C"],
+            "other",
+            "Multiple columns are mapped to each label in ['B', 'C'] in DataFrame other",
+        ),
+        (["A", "F"], None, None),
+        (["A"], None, None),
+    ],
+)
+@patch("snowflake.snowpark.dataframe.DataFrame")
+def test_validation_duplicated_data_columns_for_labels(
+    mock_dataframe, pandas_labels, frame_identifier, expected_message
+):
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier('"A"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"B_1"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"C_1"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"B_2"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"C_2"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"F"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"F_INDEX"'), datatype=IntegerType
+            ),
+        ]
+    )
+    mock_dataframe.schema = snowpark_df_schema
+    mock_dataframe.select.return_value = mock_dataframe
+    mock_dataframe._ordering_columns = [OrderingColumn('"F_INDEX"')]
+
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_dataframe),
+        ordering_columns=[OrderingColumn('"F_INDEX"')],
+    )
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=ordered_dataframe,
+        data_column_pandas_labels=["A", "B", "C", "B", "C", "F"],
+        data_column_pandas_index_names=[None],
+        data_column_snowflake_quoted_identifiers=[
+            '"A"',
+            '"B_1"',
+            '"C_1"',
+            '"B_2"',
+            '"C_2"',
+            '"F"',
+        ],
+        index_column_pandas_labels=["F"],
+        index_column_snowflake_quoted_identifiers=['"F_INDEX"'],
+    )
+
+    if expected_message is not None:
+        with pytest.raises(ValueError, match=re.escape(expected_message)):
+            internal_frame.validate_no_duplicated_data_columns_mapped_for_labels(
+                pandas_labels, frame_identifier
+            )
+    else:
+        internal_frame.validate_no_duplicated_data_columns_mapped_for_labels(
+            pandas_labels, frame_identifier
+        )
+
+
+def test_update_columns_quoted_identifier_with_expressions_negative(test_dataframes):
+    with pytest.raises(ValueError, match="is not in"):
+        test_dataframes.internal_frame.update_snowflake_quoted_identifiers_with_expressions(
+            {'"x"': col('"a"') + 1}
+        ).frame
diff --git a/tests/unit/modin/test_io.py b/tests/unit/modin/test_io.py
new file mode 100644
index 00000000000..3cb375f213c
--- /dev/null
+++ b/tests/unit/modin/test_io.py
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import modin.pandas as pd
+import pandas as native_pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+
+@pytest.mark.parametrize(
+    "api",
+    [
+        pd.to_pandas,
+        pd.to_snowpark,
+        pd.to_snowflake,
+    ],
+)
+def test_wrong_obj(api):
+    with pytest.raises(
+        TypeError,
+        match="obj must be a Snowpark pandas DataFrame or Series",
+    ):
+        if api.__name__ == "to_snowflake":
+            api(native_pd.Series([1, 2, 3]), name="table-name")
+        else:
+            api(native_pd.Series([1, 2, 3]))
diff --git a/tests/unit/modin/test_ordered_dataframe.py b/tests/unit/modin/test_ordered_dataframe.py
new file mode 100644
index 00000000000..ae88e506273
--- /dev/null
+++ b/tests/unit/modin/test_ordered_dataframe.py
@@ -0,0 +1,139 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from unittest import mock
+
+import pytest
+
+from snowflake.snowpark.dataframe import DataFrame as SnowparkDataFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.types import (
+    ColumnIdentifier,
+    IntegerType,
+    StructField,
+    StructType,
+)
+
+
+@pytest.fixture(scope="function")
+def mock_snowpark_dataframe() -> SnowparkDataFrame:
+    fake_snowpark_dataframe = mock.create_autospec(SnowparkDataFrame)
+    snowpark_df_schema = StructType(
+        [
+            StructField(
+                column_identifier=ColumnIdentifier('"a"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"b"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"C"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"d"'), datatype=IntegerType
+            ),
+            StructField(
+                column_identifier=ColumnIdentifier('"INDEX"'), datatype=IntegerType
+            ),
+        ]
+    )
+    fake_snowpark_dataframe.schema = snowpark_df_schema
+    return fake_snowpark_dataframe
+
+
+@pytest.mark.parametrize(
+    "row_pos_quoted_identifier, ordering_columns",
+    [
+        ('"INDEX"', [OrderingColumn('"a"')]),
+        (None, [OrderingColumn('"a"', '"C"')]),
+        (None, [OrderingColumn('"a"')]),
+    ],
+)
+def test_row_position_column(
+    mock_snowpark_dataframe, row_pos_quoted_identifier, ordering_columns
+) -> None:
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_snowpark_dataframe),
+        ordering_columns=ordering_columns,
+        row_position_snowflake_quoted_identifier=row_pos_quoted_identifier,
+    )
+
+    row_position_column = ordered_dataframe._row_position_snowpark_column()
+    if row_pos_quoted_identifier is not None:
+        assert row_position_column.get_name() == row_pos_quoted_identifier
+    else:
+        assert row_position_column.get_name() is None
+        assert str(row_position_column) == "Column[WINDOWEXPRESSION - LITERAL]"
+
+
+def test_property_immutability(mock_snowpark_dataframe):
+    ordering_columns = [OrderingColumn('"d"'), OrderingColumn('"INDEX"')]
+    ordered_dataframe = OrderedDataFrame(
+        DataFrameReference(mock_snowpark_dataframe),
+        ordering_columns=ordering_columns,
+        row_position_snowflake_quoted_identifier=None,
+    )
+
+    # verify immutability of projected_column_snowflake_quoted_identifiers
+    assert ordered_dataframe.projected_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"b"',
+        '"C"',
+        '"d"',
+        '"INDEX"',
+    ]
+    ordered_dataframe.projected_column_snowflake_quoted_identifiers.append('"abc"')
+    assert ordered_dataframe.projected_column_snowflake_quoted_identifiers == [
+        '"a"',
+        '"b"',
+        '"C"',
+        '"d"',
+        '"INDEX"',
+    ]
+
+    # verify immutability of ordering columns
+    assert ordered_dataframe.ordering_columns == ordering_columns
+    ordered_dataframe.ordering_columns.append(OrderingColumn('"abc"'))
+    assert ordered_dataframe.ordering_columns == ordering_columns
+
+    assert (
+        type(ordered_dataframe._projected_column_snowflake_quoted_identifiers_tuple)
+        == tuple
+    )
+    assert type(ordered_dataframe._ordering_columns_tuple) == tuple
+
+
+def test_ordered_dataframe_no_ordering_columns_negative(
+    mock_snowpark_dataframe,
+) -> None:
+    with pytest.raises(AssertionError, match="ordering_columns cannot be empty"):
+        OrderedDataFrame(
+            DataFrameReference(mock_snowpark_dataframe),
+            ordering_columns=[],
+            row_position_snowflake_quoted_identifier=None,
+        )
+
+
+def test_ordered_dataframe_missing_ordering_column_negative(mock_snowpark_dataframe):
+    with pytest.raises(AssertionError, match='ordering column "E" not found'):
+        OrderedDataFrame(
+            DataFrameReference(mock_snowpark_dataframe),
+            ordering_columns=[OrderingColumn('"E"')],
+            row_position_snowflake_quoted_identifier='"a"',
+        )
+
+
+def test_ordered_dataframe_missing_row_position_column_negative(
+    mock_snowpark_dataframe,
+):
+    with pytest.raises(AssertionError, match='row position column "E" not found'):
+        OrderedDataFrame(
+            DataFrameReference(mock_snowpark_dataframe),
+            ordering_columns=[OrderingColumn('"INDEX"')],
+            row_position_snowflake_quoted_identifier='"E"',
+        )
diff --git a/tests/unit/modin/test_pandas_module.py b/tests/unit/modin/test_pandas_module.py
new file mode 100644
index 00000000000..77a58b6abd7
--- /dev/null
+++ b/tests/unit/modin/test_pandas_module.py
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import pickle
+from unittest import mock
+
+import modin.pandas as pd
+import pytest
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+
+
+def test_pickle():
+    """
+    Test that we can pickle the pandas module.
+
+    Using the pattern [1] to implement pd.session makes the module unpickleable
+    unless we add the fix [2].
+
+    [1] https://docs.python.org/3.12/reference/datamodel.html#customizing-module-attribute-access
+    [2] https://github.com/cloudpipe/cloudpickle/issues/405#issuecomment-756085104
+    """
+    pickled = pickle.dumps(pd)
+    assert pickle.loads(pickled) is pd
+
+
+def test_get_missing_attribute():
+    with pytest.raises(
+        AttributeError,
+        match="module 'modin.pandas' has no attribute 'missing_attribute'",
+    ):
+        pd.missing_attribute
+
+
+@mock.patch("snowflake.snowpark.context.get_active_session")
+def test_get_session_raises_exception_unrelated_to_session(mock_get_active_session):
+    """Test getting session when get_active_session() raises an exception other than SnowparkSessionException."""
+    mock_get_active_session.side_effect = KeyError
+    with pytest.raises(KeyError):
+        pd.session
+    mock_get_active_session.assert_called_once()
diff --git a/tests/unit/modin/test_python_version.py b/tests/unit/modin/test_python_version.py
new file mode 100644
index 00000000000..47572574684
--- /dev/null
+++ b/tests/unit/modin/test_python_version.py
@@ -0,0 +1,26 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import sys
+
+import pytest
+
+
+def test_error_on_import():
+    # On Python 3.8, we must run pytest with --noconftest, since various conftest.py files
+    # will error when importing modin.pandas.
+    # We therefore cannot test any Snowpark DataFrame operations, since we don't have the ability
+    # to set up a session here; we can only test that imports work as expected.
+    if sys.version_info.major == 3 and sys.version_info.minor == 8:
+        # Importing snowpark (without pandas) should not error
+        import snowflake.snowpark  # noqa: F401
+
+        with pytest.raises(RuntimeError):
+            # Importing snowpark pandas should error
+            import modin.pandas  # noqa: F401
+    else:
+        # Should not error
+        import modin.pandas  # noqa: F401
+
+        import snowflake.snowpark  # noqa: F401
diff --git a/tests/unit/modin/test_series_cat.py b/tests/unit/modin/test_series_cat.py
new file mode 100644
index 00000000000..5c956e171ee
--- /dev/null
+++ b/tests/unit/modin/test_series_cat.py
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import pandas as native_pd
+import pytest
+
+from snowflake.snowpark.modin.pandas.series_utils import CategoryMethods
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda s: s.cat.categories(),
+        lambda s: s.cat.categories(native_pd.Categorical([1, 2, 3, 3, 1])),
+        lambda s: s.cat.ordered(),
+        lambda s: s.cat.codes(),
+        lambda s: s.cat.rename_categories(native_pd.Categorical([1, 2, 3, 3, 1])),
+        lambda s: s.cat.rename_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]), inplace=True
+        ),
+        lambda s: s.cat.reorder_categories(native_pd.Categorical([1, 2, 3, 3, 1])),
+        lambda s: s.cat.reorder_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]), ordered=[2, 1, 3]
+        ),
+        lambda s: s.cat.reorder_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]), ordered=[2, 1, 3], inplace=True
+        ),
+        lambda s: s.cat.add_categories(native_pd.Categorical([1, 2, 3, 3, 1])),
+        lambda s: s.cat.add_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]), inplace=True
+        ),
+        lambda s: s.cat.remove_categories("A"),
+        lambda s: s.cat.remove_categories("A", inplace=True),
+        lambda s: s.cat.remove_unused_categories(),
+        lambda s: s.cat.remove_unused_categories(inplace=True),
+        lambda s: s.cat.set_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]), ordered=[2, 1, 3]
+        ),
+        lambda s: s.cat.set_categories(
+            native_pd.Categorical([1, 2, 3, 3, 1]),
+            ordered=[2, 1, 3],
+            rename=True,
+            inplace=True,
+        ),
+        lambda s: s.cat.as_ordered(inplace=True),
+        lambda s: s.cat.as_unordered(),
+    ],
+)
+def test_cat_methods_raises(mock_series, func) -> None:
+    with pytest.raises(
+        NotImplementedError, match=CategoryMethods.category_not_supported_message
+    ):
+        func(mock_series)
diff --git a/tests/unit/modin/test_series_dt.py b/tests/unit/modin/test_series_dt.py
new file mode 100644
index 00000000000..9a59cbab5cb
--- /dev/null
+++ b/tests/unit/modin/test_series_dt.py
@@ -0,0 +1,179 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import datetime
+from typing import Callable
+from unittest import mock
+
+import modin.pandas as pd
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Series
+from pandas import DatetimeTZDtype
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.types import TimestampType
+
+
+@pytest.fixture(scope="function")
+def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
+    mock_internal_frame = mock.create_autospec(InternalFrame)
+    mock_internal_frame.data_columns_index = native_pd.Index(["A"], name="B")
+    mock_internal_frame.data_column_snowflake_quoted_identifiers = ['"A"']
+    mock_internal_frame.quoted_identifier_to_snowflake_type.return_value = {
+        '"A"': TimestampType()
+    }
+    fake_query_compiler = SnowflakeQueryCompiler(mock_internal_frame)
+
+    return fake_query_compiler
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.DateTimeDefault.register"
+)
+@pytest.mark.parametrize(
+    "func, func_name",
+    [
+        (lambda s: s.dt.time, "time"),
+        (lambda s: s.dt.timetz, "timetz"),
+        (lambda s: s.dt.microsecond, "microsecond"),
+        (lambda s: s.dt.nanosecond, "nanosecond"),
+        (lambda s: s.dt.week, "week"),
+        (lambda s: s.dt.weekofyear, "weekofyear"),
+        (lambda s: s.dt.dayofweek, "dayofweek"),
+        (lambda s: s.dt.weekday, "weekday"),
+        (lambda s: s.dt.dayofyear, "dayofyear"),
+        (lambda s: s.dt.is_month_start, "is_month_start"),
+        (lambda s: s.dt.is_month_end, "is_month_end"),
+        (lambda s: s.dt.is_quarter_start, "is_quarter_start"),
+        (lambda s: s.dt.is_quarter_end, "is_quarter_end"),
+        (lambda s: s.dt.is_year_start, "is_year_start"),
+        (lambda s: s.dt.is_year_end, "is_year_end"),
+        (lambda s: s.dt.is_leap_year, "is_leap_year"),
+        (lambda s: s.dt.daysinmonth, "daysinmonth"),
+        (lambda s: s.dt.days_in_month, "days_in_month"),
+        (lambda s: s.dt.to_period(), "to_period"),
+        (lambda s: s.dt.tz_localize(tz="UTC"), "tz_localize"),
+        (lambda s: s.dt.tz_convert(tz="UTC"), "tz_convert"),
+        (lambda s: s.dt.normalize(), "normalize"),
+        (lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"),
+        (lambda s: s.dt.round(freq="1D"), "round"),
+        (lambda s: s.dt.floor(freq="1D"), "floor"),
+        (lambda s: s.dt.ceil(freq="1D"), "ceil"),
+        (lambda s: s.dt.month_name(), "month_name"),
+        (lambda s: s.dt.day_name(), "day_name"),
+        (lambda s: s.dt.total_seconds(), "total_seconds"),
+        (lambda s: s.dt.seconds, "seconds"),
+        (lambda s: s.dt.days, "days"),
+        (lambda s: s.dt.microseconds, "microseconds"),
+        (lambda s: s.dt.nanoseconds, "nanoseconds"),
+        (lambda s: s.dt.qyear, "qyear"),
+        (lambda s: s.dt.start_time, "start_time"),
+        (lambda s: s.dt.end_time, "end_time"),
+        (lambda s: s.dt.to_timestamp(), "to_timestamp"),
+    ],
+)
+def test_dt_methods(
+    mock_datetime_register, func, func_name, mock_query_compiler_for_dt_series
+):
+    mock_series = pd.Series(query_compiler=mock_query_compiler_for_dt_series)
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = mock_query_compiler_for_dt_series
+    mock_datetime_register.return_value = return_callable
+    res = func(mock_series)
+    mock_datetime_register.assert_called_once()
+    assert isinstance(res, Series), func_name
+    assert res._query_compiler == mock_query_compiler_for_dt_series, func_name
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.DateTimeDefault.register"
+)
+def test_dt_components(mock_datetime_register, mock_query_compiler_for_dt_series):
+    mock_series = pd.Series(query_compiler=mock_query_compiler_for_dt_series)
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = mock_query_compiler_for_dt_series
+    mock_datetime_register.return_value = return_callable
+    res = mock_series.dt.components
+    mock_datetime_register.assert_called_once()
+    assert isinstance(res, DataFrame)
+    assert res._query_compiler == mock_query_compiler_for_dt_series
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.DateTimeDefault.register"
+)
+def test_dt_to_pytimedelta(mock_datetime_register, mock_query_compiler_for_dt_series):
+    mock_series = pd.Series(query_compiler=mock_query_compiler_for_dt_series)
+    result_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    result_array = np.array(
+        [
+            [
+                datetime.timedelta(0),
+                datetime.timedelta(days=1),
+                datetime.timedelta(days=2),
+                datetime.timedelta(days=3),
+            ],
+            [0, 1, 2, 3],
+        ],
+        dtype=object,
+    )
+    result_query_compiler.to_numpy.return_value = result_array
+
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = result_query_compiler
+    mock_datetime_register.return_value = return_callable
+    res = mock_series.dt.to_pytimedelta()
+    assert res.tolist() == np.array([datetime.timedelta(0), 0]).tolist()
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.DateTimeDefault.register"
+)
+def test_dt_to_pydatetime(mock_datetime_register, mock_query_compiler_for_dt_series):
+    mock_series = pd.Series(query_compiler=mock_query_compiler_for_dt_series)
+    result_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    result_query_compiler.columnarize.return_value = result_query_compiler
+    result_array = np.array(
+        [datetime.datetime(2018, 3, 10, 0, 0), datetime.datetime(2018, 3, 11, 0, 0)],
+        dtype=object,
+    )
+    result_query_compiler.to_numpy.return_value = result_array
+
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = result_query_compiler
+    mock_datetime_register.return_value = return_callable
+    res = mock_series.dt.to_pydatetime()
+    assert res.tolist() == result_array.tolist()
+
+
+def test_dt_tz():
+    mock_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    mock_query_compiler.columnarize.return_value = mock_query_compiler
+    time_type = DatetimeTZDtype(tz="UTC")
+    mock_query_compiler.dtypes = native_pd.Series([time_type])
+    mock_series = Series(query_compiler=mock_query_compiler)
+
+    res = mock_series.dt.tz
+    assert res == time_type.tz
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.DateTimeDefault.register"
+)
+def test_dt_freq(mock_datetime_register, mock_query_compiler_for_dt_series):
+    mock_series = pd.Series(query_compiler=mock_query_compiler_for_dt_series)
+    result_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    result_query_compiler.to_pandas.return_value = native_pd.DataFrame(["D"])
+
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = result_query_compiler
+    mock_datetime_register.return_value = return_callable
+    res = mock_series.dt.freq
+    assert res == "D"
diff --git a/tests/unit/modin/test_series_strings.py b/tests/unit/modin/test_series_strings.py
new file mode 100644
index 00000000000..a87e0f8015f
--- /dev/null
+++ b/tests/unit/modin/test_series_strings.py
@@ -0,0 +1,182 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from typing import Callable
+from unittest import mock
+
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Series
+
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.StrDefault.register"
+)
+def test_str_cat_no_others(mock_str_register, mock_series):
+    result_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    result_query_compiler.to_pandas.return_value = native_pd.DataFrame(["abc"])
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = result_query_compiler
+    mock_str_register.return_value = return_callable
+    res = mock_series.str.cat()
+    assert res == "abc"
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.StrDefault.register"
+)
+@pytest.mark.parametrize(
+    "func, func_name",
+    [
+        (lambda s: s.str.casefold(), "casefold"),
+        (lambda s: s.str.cat(["a", "b", "d", "foo"], na_rep="-"), "cat"),
+        (lambda s: s.str.decode("utf-8"), "decode"),
+        (lambda s: s.str.encode("utf-8"), "encode"),
+        (lambda s: s.str.rsplit("_", n=1), "rsplit"),
+        (lambda s: s.str.get(3), "get"),
+        (lambda s: s.str.join("_"), "join"),
+        (lambda s: s.str.pad(10), "pad"),
+        (lambda s: s.str.center(10), "center"),
+        (lambda s: s.str.ljust(8), "ljust"),
+        (lambda s: s.str.rjust(8), "rjust"),
+        (lambda s: s.str.zfill(8), "zfill"),
+        (lambda s: s.str.wrap(3), "wrap"),
+        (lambda s: s.str.slice(start=3, stop=5), "slice"),
+        (lambda s: s.str.slice_replace(start=3, stop=5, repl="abc"), "slice_replace"),
+        (lambda s: s.str.findall("ab"), "findall"),
+        (lambda s: s.str.match("ab", case=False), "match"),
+        (lambda s: s.str.extract("(ab)", expand=False), "extract"),
+        (lambda s: s.str.extractall("(ab)", flags=1), "extractall"),
+        (lambda s: s.str.rstrip(","), "rstrip"),
+        (lambda s: s.str.lstrip(","), "lstrip"),
+        (lambda s: s.str.partition("|", expand=False), "partition"),
+        (lambda s: s.str.removeprefix("t"), "removeprefix"),
+        (lambda s: s.str.removesuffix("a"), "removesuffix"),
+        (lambda s: s.str.repeat("a"), "repeat"),
+        (lambda s: s.str.rpartition(","), "rpartition"),
+        (lambda s: s.str.find("abc"), "find"),
+        (lambda s: s.str.rfind("abc"), "rfind"),
+        (lambda s: s.str.index("abc", start=1), "index"),
+        (lambda s: s.str.rindex("abc", start=1), "rindex"),
+        (lambda s: s.str.swapcase(), "swapcase"),
+        (lambda s: s.str.normalize("NFC"), "normalize"),
+        (lambda s: s.str.translate(str.maketrans("a", "b")), "translate"),
+        (lambda s: s.str.isalnum(), "isalnum"),
+        (lambda s: s.str.isalpha(), "isalpha"),
+        (lambda s: s.str.isnumeric(), "isnumeric"),
+        (lambda s: s.str.isdecimal(), "isdecimal"),
+    ],
+)
+def test_str_methods_with_series_return(
+    mock_str_register, func, func_name, mock_series, mock_single_col_query_compiler
+):
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = mock_single_col_query_compiler
+    mock_str_register.return_value = return_callable
+    res = func(mock_series)
+    mock_str_register.assert_called_once()
+    assert isinstance(res, Series), func_name
+    assert res._query_compiler == mock_single_col_query_compiler, func_name
+
+
+@mock.patch(
+    "snowflake.snowpark.modin.core.dataframe.algebra.default2pandas.StrDefault.register"
+)
+@pytest.mark.parametrize(
+    "func, func_name",
+    [
+        (lambda s: s.str.get_dummies(), "get_dummies"),
+        (lambda s: s.str.extract("(ab)", expand=True), "extract_expand"),
+        (lambda s: s.str.extract("(ab)(cd)", expand=False), "extract_groups"),
+        (lambda s: s.str.partition(","), "partition"),
+    ],
+)
+def test_str_methods_with_dataframe_return(
+    mock_str_register, func, func_name, mock_series, mock_single_col_query_compiler
+):
+    return_callable = mock.create_autospec(Callable)
+    return_callable.return_value = mock_single_col_query_compiler
+    mock_str_register.return_value = return_callable
+    res = func(mock_series)
+    mock_str_register.assert_called_once()
+    assert isinstance(res, DataFrame), func_name
+    assert res._query_compiler == mock_single_col_query_compiler, func_name
+
+
+@pytest.mark.parametrize(
+    "func, error_type, error_message",
+    [
+        (
+            lambda s: s.str.rsplit(""),
+            ValueError,
+            r"rsplit\(\) requires a non-empty pattern match.",
+        ),
+        (
+            lambda s: s.str.join(sep=None),
+            AttributeError,
+            "'NoneType' object has no attribute 'join'",
+        ),
+        (
+            lambda s: s.str.pad(8, fillchar="abc"),
+            TypeError,
+            "fillchar must be a character, not str",
+        ),
+        (
+            lambda s: s.str.center(8, fillchar="abc"),
+            TypeError,
+            "fillchar must be a character, not str",
+        ),
+        (
+            lambda s: s.str.ljust(3, fillchar="abc"),
+            TypeError,
+            "fillchar must be a character, not str",
+        ),
+        (
+            lambda s: s.str.rjust(3, fillchar="abc"),
+            TypeError,
+            "fillchar must be a character, not str",
+        ),
+        (lambda s: s.str.wrap(-1), ValueError, r"invalid width -1 \(must be > 0\)"),
+        (
+            lambda s: s.str.slice(start=2, stop=5, step=0),
+            ValueError,
+            "slice step cannot be zero",
+        ),
+        (
+            lambda s: s.str.count(12),
+            TypeError,
+            "first argument must be string or compiled pattern",
+        ),
+        (
+            lambda s: s.str.findall(12),
+            TypeError,
+            "first argument must be string or compiled pattern",
+        ),
+        (
+            lambda s: s.str.match(12),
+            TypeError,
+            "first argument must be string or compiled pattern",
+        ),
+        (lambda s: s.str.partition(sep=""), ValueError, "empty separator"),
+        (lambda s: s.str.rpartition(sep=""), ValueError, "empty separator"),
+        (lambda s: s.str.find(sub=111), TypeError, "expected a string object, not int"),
+        (
+            lambda s: s.str.index(sub=111),
+            TypeError,
+            "expected a string object, not int",
+        ),
+        (
+            lambda s: s.str.rindex(sub=111),
+            TypeError,
+            "expected a string object, not int",
+        ),
+    ],
+)
+def test_methods_with_error_raise(mock_series, func, error_type, error_message):
+    with pytest.raises(error_type, match=error_message):
+        func(mock_series)
diff --git a/tests/unit/modin/test_snowflake_query_compiler.py b/tests/unit/modin/test_snowflake_query_compiler.py
new file mode 100644
index 00000000000..90a2a5f3f1a
--- /dev/null
+++ b/tests/unit/modin/test_snowflake_query_compiler.py
@@ -0,0 +1,64 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+from unittest.mock import patch
+
+import pytest
+
+from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import (
+    DataFrameReference,
+    OrderedDataFrame,
+    OrderingColumn,
+)
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+from snowflake.snowpark.types import ColumnIdentifier, IntegerType, StructField
+
+
+@pytest.fixture(scope="function")
+@patch("snowflake.snowpark.dataframe.DataFrame")
+def test_query_compiler(mock_dataframe) -> SnowflakeQueryCompiler:
+    snowpark_df_schema_fields = (
+        StructField(column_identifier=ColumnIdentifier('"a"'), datatype=IntegerType),
+        StructField(column_identifier=ColumnIdentifier('"B"'), datatype=IntegerType),
+        StructField(column_identifier=ColumnIdentifier('"C"'), datatype=IntegerType),
+        StructField(
+            column_identifier=ColumnIdentifier('"INDEX"'), datatype=IntegerType
+        ),
+    )
+    mock_dataframe.schema.fields = snowpark_df_schema_fields
+
+    internal_frame = InternalFrame.create(
+        ordered_dataframe=OrderedDataFrame(
+            DataFrameReference(mock_dataframe),
+            ordering_columns=[OrderingColumn('"INDEX"')],
+        ),
+        data_column_pandas_labels=["a", "B"],
+        data_column_pandas_index_names=["(label1, label2)"],
+        data_column_snowflake_quoted_identifiers=['"a"', '"B"'],
+        index_column_pandas_labels=["INDEX", "C"],
+        index_column_snowflake_quoted_identifiers=['"INDEX"', '"C"'],
+    )
+
+    return SnowflakeQueryCompiler(internal_frame)
+
+
+def test_get_index_names(test_query_compiler) -> None:
+    assert test_query_compiler.get_index_names(axis=0) == ["INDEX", "C"]
+    assert test_query_compiler.get_index_names(axis=1) == ["(label1, label2)"]
+
+
+def test_copy(test_query_compiler) -> None:
+    test_query_compiler.snowpark_pandas_api_calls.append({"key1": "value1"})
+    copy_qc = test_query_compiler.copy()
+    # Verify modin frame and telemetry data is copied.
+    assert copy_qc._modin_frame == test_query_compiler._modin_frame
+    assert copy_qc.snowpark_pandas_api_calls == [{"key1": "value1"}]
+
+    # Modify copy query compiler
+    copy_qc.snowpark_pandas_api_calls.append({"key2": "value2"})
+    # Verify original query compiler remains unchanged.
+    assert test_query_compiler.snowpark_pandas_api_calls == [{"key1": "value1"}]
diff --git a/tests/unit/modin/test_telemetry.py b/tests/unit/modin/test_telemetry.py
new file mode 100644
index 00000000000..117202561a8
--- /dev/null
+++ b/tests/unit/modin/test_telemetry.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import os
+import tempfile
+from typing import Callable, Optional
+from unittest.mock import ANY, MagicMock, patch
+
+import pytest
+from pandas.errors import IndexingError, SpecificationError
+
+from ci.check_standalone_function_snowpark_pandas_telemetry_decorator import (
+    DecoratorError,
+    check_standalone_function_snowpark_pandas_telemetry_decorator,
+)
+from snowflake.connector.errors import DatabaseError
+from snowflake.snowpark.exceptions import (
+    SnowparkDataframeException,
+    SnowparkSessionException,
+)
+from snowflake.snowpark.modin.plugin._internal.telemetry import (
+    error_to_telemetry_type,
+    snowpark_pandas_telemetry_method_decorator,
+)
+
+
+@patch(
+    "snowflake.snowpark.modin.plugin._internal.telemetry._send_snowpark_pandas_telemetry_helper"
+)
+def snowpark_pandas_error_test_helper(
+    send_telemetry_helper_mock,
+    func: Callable,
+    error: Exception,
+    telemetry_type: str,
+    loc_pref: Optional[str] = "SnowflakeQueryCompiler",
+    mock_arg: Optional[MagicMock] = None,
+):
+    decorated_func = MagicMock(side_effect=error)
+    decorated_func.__qualname__ = "magic_mock"
+    with pytest.raises(error):
+        wrap_func = func(decorated_func)
+        wrap_func(mock_arg)
+    send_telemetry_helper_mock.assert_called_with(
+        session=ANY,
+        func_name=f"{loc_pref}.magic_mock",
+        api_calls=[
+            {
+                "name": f"{loc_pref}.magic_mock",
+            }
+        ],
+        query_history=ANY,
+        telemetry_type=telemetry_type,
+    )
+
+
+@patch(
+    "snowflake.snowpark.modin.plugin._internal.telemetry._send_snowpark_pandas_telemetry_helper"
+)
+@patch("snowflake.snowpark.session._get_active_session")
+def test_snowpark_pandas_telemetry_method_decorator(
+    _get_active_session_mock, send_telemetry_mock
+):
+    # SnowparkSessionException: test SnowparkSessionException is suppressed
+    def raise_session_error():
+        raise SnowparkSessionException("Mock Session Error")
+
+    def raise_real_type_error(_):
+        raise TypeError("Mock Real Error")
+
+    _get_active_session_mock.side_effect = raise_session_error
+    decorated_func1 = MagicMock()
+    decorated_func1.__qualname__ = "magic_mock"
+    decorated_func1.return_value = 10  # eager API is supposed to be sent
+    mock_arg1 = MagicMock(spec=type)  # mock a class instance
+    mock_arg1.__name__ = "MockClass"
+    mock_arg1._query_compiler = MagicMock()
+    mock_arg1._query_compiler.snowpark_pandas_api_calls = []
+    wrap_func1 = snowpark_pandas_telemetry_method_decorator(decorated_func1)
+    wrap_func1(mock_arg1)
+    # Test that the SnowparkSessionException raising _get_active_session is called once.
+    assert _get_active_session_mock.call_count == 1
+    send_telemetry_mock.assert_not_called()
+    assert len(mock_arg1._query_compiler.snowpark_pandas_api_calls) == 0
+
+    # Test user errors + SnowparkSessionException
+    decorated_func1.side_effect = raise_real_type_error
+    wrap_func2 = snowpark_pandas_telemetry_method_decorator(decorated_func1)
+    with pytest.raises(TypeError) as exc_info:
+        wrap_func2(mock_arg1)
+    exception = exc_info.value
+    # Test "Mock Session Error" is suppressed from real error msg
+    assert str(exception) == "Mock Real Error"
+    assert _get_active_session_mock.call_count == 2
+    send_telemetry_mock.assert_not_called()
+    assert len(mock_arg1._query_compiler.snowpark_pandas_api_calls) == 0
+
+    # Test user errors
+    mock_arg2 = MagicMock()
+    mock_arg2._query_compiler.snowpark_pandas_api_calls = []
+    mock_arg2.__class__.__name__ = "mock_class"
+    with pytest.raises(TypeError):
+        wrap_func2(mock_arg2)
+    send_telemetry_mock.assert_called_with(
+        session=ANY,
+        func_name="mock_class.magic_mock",
+        api_calls=[
+            {
+                "name": "mock_class.magic_mock",
+            }
+        ],
+        query_history=ANY,
+        telemetry_type="snowpark_pandas_type_error",
+    )
+    assert len(mock_arg2._query_compiler.snowpark_pandas_api_calls) == 0
+
+    # Test instance method TypeError, IndexError, AttributeError
+    # from `api_calls = copy.deepcopy(args[0]._query_compiler.snowpark_pandas_api_calls)`
+    decorated_func1.side_effect = None
+    wrap_func2()
+    send_telemetry_mock.assert_called_with(
+        session=ANY,
+        func_name="mock_class.magic_mock",
+        api_calls=[
+            {
+                "name": "mock_class.magic_mock",
+            }
+        ],
+        query_history=ANY,
+        telemetry_type="snowpark_pandas_type_error",
+    )
+
+
+@pytest.mark.parametrize(
+    "error",
+    [
+        NotImplementedError,
+        TypeError,
+        ValueError,
+        KeyError,
+        AttributeError,
+        ZeroDivisionError,
+        IndexError,
+        AssertionError,
+        IndexingError,
+        SpecificationError,
+        DatabaseError,
+    ],
+)
+def test_snowpark_pandas_telemetry_method_error(error):
+    mock_arg = MagicMock()
+    mock_arg._query_compiler.snowpark_pandas_api_calls = []
+    mock_arg.__class__.__name__ = "mock_class"
+    snowpark_pandas_error_test_helper(
+        func=snowpark_pandas_telemetry_method_decorator,
+        error=error,
+        telemetry_type=error_to_telemetry_type(error("error_msg")),
+        loc_pref="mock_class",
+        mock_arg=mock_arg,
+    )
+
+
+@pytest.mark.parametrize(
+    "error, telemetry_type",
+    [
+        (NotImplementedError, "snowpark_pandas_not_implemented_error"),
+        (TypeError, "snowpark_pandas_type_error"),
+        (SpecificationError, "snowpark_pandas_specification_error"),
+        (DatabaseError, "snowpark_pandas_database_error"),
+        (SnowparkDataframeException, "snowpark_pandas_snowpark_dataframe_exception"),
+    ],
+)
+def test_error_to_telemetry_type(error, telemetry_type):
+    assert error_to_telemetry_type(error("error_msg")) == telemetry_type
+
+
+def test_check_standalone_function_snowpark_pandas_telemetry_decorator():
+    # Create a temporary file with sample code
+    code = """
+from snowflake.snowpark.modin import pandas as pd
+from modin.pandas.dataframe import DataFrame
+from modin.pandas.series import Series
+import test_decorator
+
+def func1() -> DataFrame:
+    def sub_func() -> DataFrame: #sub function should be excluded
+        return pd.DataFrame()
+    return pd.DataFrame()
+
+def func2() -> Series:
+    return pd.Series()
+
+@test_decorator
+def _private_func() -> DataFrame:
+    return pd.DataFrame()
+
+def func3() -> int:
+    return 0
+
+@test_decorator
+def func4() -> DataFrame:
+    return pd.DataFrame()
+
+# Test class methods/instance methods will not be decorated
+class TestClass:
+    def test_instance_method(self) -> DataFrame:
+        return pd.DataFrame()
+
+    def test_class_method(cls) -> DataFrame:
+        return pd.DataFrame()
+    """
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file:
+        tmp_file.write(code)
+        tmp_file.flush()
+
+        with pytest.raises(DecoratorError) as exc_info:
+            check_standalone_function_snowpark_pandas_telemetry_decorator(
+                target_file=tmp_file.name,
+                telemetry_decorator_name="test_decorator",
+            )
+        assert (
+            str(exc_info.value)
+            == "functions ['func1', 'func2', 'func3'] should be decorated with test_decorator"
+        )
+
+    # Clean up the temporary file
+    os.remove(tmp_file.name)
diff --git a/tests/unit/modin/test_test_utils.py b/tests/unit/modin/test_test_utils.py
new file mode 100644
index 00000000000..ce1577834be
--- /dev/null
+++ b/tests/unit/modin/test_test_utils.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+
+import contextlib
+from decimal import Decimal
+
+import numpy as np
+import pandas as native_pd
+import pytest
+from pandas.io.formats.printing import PrettyDict
+from pytest import param
+
+from tests.integ.modin.utils import assert_dicts_equal, assert_values_equal
+
+
+class A:
+    def __lt__(self, other):
+        return False
+
+    def __eq__(self, other):
+        return True
+
+    def __hash__(self):
+        return id(self)
+
+    def __repr__(self):
+        # Always give the same __repr__. Otherwise, each pytest-xdist process
+        # gets its own A() object with a different repr like
+        # "test_test_utils.A object at 0x7ff7008d8880" and pytest-xdist cannot
+        # run the tests in parallel (see SNOW-1000116).
+        return "a_object"
+
+
+a_object = A()
+numpy_nan_variable = np.nan
+float_nan_variable = float("nan")
+
+
+@pytest.mark.parametrize(
+    "actual,expected,expect_equals",
+    [
+        ({0: 1}, PrettyDict({0: 1}), False),
+        (PrettyDict({0: 1}), {0: 1}, False),
+        ({0: 1}, {}, False),
+        (
+            {1: native_pd.DataFrame([[1, 2], [3, 4]]), 2: "string"},
+            {1: native_pd.DataFrame([[1, 2], [3, 4]]), 2: "string"},
+            True,
+        ),
+        ({1: native_pd.Series([1, 2])}, {1: native_pd.Series([1, 2])}, True),
+        ({1: native_pd.Series([1, 2])}, {1: native_pd.DataFrame([1, 2])}, False),
+        (
+            # Ignore dtypes on index objects
+            {1: native_pd.Index([1, 2], dtype="float64")},
+            {1: native_pd.Index([1, 2], dtype="int64")},
+            True,
+        ),
+        ({1: native_pd.Index([1])}, {1: native_pd.Index([1])}, True),
+        # begin nans in values tests
+        ({1: None}, {1: np.nan}, False),
+        ({1: np.nan}, {1: np.nan}, True),
+        ({1: float_nan_variable}, {1: numpy_nan_variable}, True),
+        ({1: numpy_nan_variable}, {1: float_nan_variable}, True),
+        ({1: numpy_nan_variable}, {1: numpy_nan_variable}, True),
+        ({1: float_nan_variable}, {1: float_nan_variable}, True),
+        ({1: native_pd.Index([None])}, {1: native_pd.Index([None])}, True),
+        ({1: native_pd.Index([np.nan])}, {1: native_pd.Index([np.nan])}, True),
+        # Ignore dtypes on index objects
+        ({1: native_pd.Index([np.nan])}, {1: native_pd.Index([None])}, True),
+        # end nans in values tests
+        param({1: "a", 2: "b"}, {2: "b", 1: "a"}, False, id="keys_out_of_order"),
+        # begin cases with nan and None keys.
+        ({numpy_nan_variable: 3}, {numpy_nan_variable: 3}, True),
+        ({np.nan: 3}, {np.nan: 3}, True),
+        # Compare float_nan_variable to itself, but also compare
+        # float('nan') to another float('nan') in another test case,
+        # because float('nan') is not float('nan') but float_nan_variable
+        # is float_nan_variable
+        ({float_nan_variable: 3}, {float_nan_variable: 3}, True),
+        ({float("nan"): 3}, {float("nan"): 3}, True),
+        ({None: 0}, {None: 0}, True),
+        # end cases with nan and None keys.
+        # this is a pathological case to catch comparing
+        # dictionaries with itertools.zip_longest() without
+        # passing fillval, or using zip() without strict=True.
+        # The shorter dictionary's items() gets padded with `None`,
+        # which is equal to A(). This test failed at
+        # commit eb2eaafbc247161239ba5b18c60d86a3270aa930
+        ({None: 0, a_object: 0}, {None: 0}, False),
+        # begin cases with wrong types
+        (dict(), 1, False),
+        (dict(), dict(), True),
+        (dict(), [], False),
+        ([], dict(), False),
+        # end cases with wrong types
+    ],
+    ids=lambda v: str(v),
+)
+def test_assert_dicts_equal(actual, expected, expect_equals):
+    with contextlib.nullcontext() if expect_equals else pytest.raises(AssertionError):
+        assert_dicts_equal(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "actual,expected,expect_equals",
+    [
+        (1, native_pd.DataFrame([1]), False),
+        (native_pd.Series(1), native_pd.DataFrame([1]), False),
+        (native_pd.DataFrame([1], dtype=object), native_pd.DataFrame([1]), False),
+        (native_pd.DataFrame([1]), native_pd.DataFrame([1]), True),
+        (native_pd.DataFrame([1, 2]), native_pd.DataFrame([1, 2]), True),
+        (native_pd.DataFrame([None]), native_pd.DataFrame([None]), True),
+        (native_pd.DataFrame([None]), native_pd.DataFrame([np.nan]), False),
+        (native_pd.DataFrame([np.nan]), native_pd.DataFrame([None]), False),
+        (native_pd.DataFrame([]), native_pd.DataFrame([None]), False),
+        (native_pd.DataFrame([None]), native_pd.DataFrame([]), False),
+        (native_pd.DataFrame(), native_pd.DataFrame(), True),
+        (1, native_pd.Series(1), False),
+        (native_pd.Series(1, dtype=object), native_pd.Series(1), False),
+        (native_pd.Series(1), native_pd.Series(1), True),
+        (native_pd.Series([1, None]), native_pd.Series([1, None]), True),
+        (
+            native_pd.Index([1, 2], dtype="float64"),
+            native_pd.Index([1, 2], dtype="int64"),
+            False,
+        ),
+        (native_pd.Index([1, 2]), native_pd.Index([3]), False),
+        (np.array([1]), 1, False),
+        (1, np.array([1]), False),
+        (np.array([1, np.nan, 2]), np.array([1.00, np.nan, 2.0]), True),
+        (1, [1], False),
+        ([1, 2], [1, 2], True),
+        ([1, None], [1], False),
+        ([1, None], [1, None], True),
+        ((1, float("nan")), (1, float("nan")), True),
+        ((), [], False),
+        ((1, 2), [1, 2], False),
+        ({}, (), False),
+        ({1, 2}, {2, 1}, True),
+        ({1}, {}, False),
+        (None, None, True),
+        (np.nan, np.nan, True),
+        (native_pd.NaT, native_pd.NaT, True),
+        (float("nan"), float("nan"), True),
+        (None, np.nan, False),
+        (np.nan, None, False),
+        (None, float("nan"), False),
+        (float("nan"), None, False),
+        (native_pd.NA, np.nan, False),
+        (np.nan, native_pd.NA, False),
+        (native_pd.NaT, np.nan, False),
+        (np.nan, native_pd.NaT, False),
+        (1, 1.0, True),
+        (1.0, 1, True),
+        (1.5, 1.5, True),
+        (1.5, 1.6, False),
+        (Decimal("1e-20"), 0, False),
+        (0, Decimal("1e-20"), False),
+        (Decimal("1e-20"), Decimal("1e-20"), True),
+        ("ac", "bc", False),
+        ("ac", "ac", True),
+        (native_pd.Timestamp(1), native_pd.Timestamp(1), True),
+        (
+            native_pd.Timestamp(1, tz="America/Los_Angeles"),
+            native_pd.Timestamp(1, tz="America/Los_Angeles"),
+            True,
+        ),
+        (
+            native_pd.Timestamp(1),
+            native_pd.Timestamp(1, tz="America/Los_Angeles"),
+            False,
+        ),
+        (
+            native_pd.Timestamp(1, tz="America/Los_Angeles"),
+            native_pd.Timestamp(1),
+            False,
+        ),
+        (native_pd.Timedelta(days=1), native_pd.Timedelta(days=1), True),
+        (native_pd.Timedelta(days=1), native_pd.Timedelta(days=2), False),
+    ],
+    ids=lambda v: str(v),
+)
+def test_assert_values_equal(actual, expected, expect_equals):
+    with contextlib.nullcontext() if expect_equals else pytest.raises(AssertionError):
+        assert_values_equal(actual, expected)
diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py
new file mode 100644
index 00000000000..6bcea8b6e73
--- /dev/null
+++ b/tests/unit/modin/test_unsupported.py
@@ -0,0 +1,271 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+from unittest import mock
+
+import modin.pandas as pd
+import pytest
+from modin.pandas import DataFrame, Series
+
+import snowflake.snowpark.modin.plugin  # noqa: F401
+from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
+    SnowflakeQueryCompiler,
+)
+
+
+@pytest.mark.parametrize(
+    "io_method, kwargs",
+    [
+        ["read_xml", {"path_or_buffer": ""}],
+        ["read_table", {"filepath_or_buffer": ""}],
+        ["read_gbq", {"query": ""}],
+        ["read_html", {"io": ""}],
+        ["read_clipboard", {}],
+        ["read_excel", {"io": ""}],
+        ["read_hdf", {"path_or_buf": ""}],
+        ["read_feather", {"path": ""}],
+        ["read_stata", {"filepath_or_buffer": ""}],
+        ["read_sas", {"filepath_or_buffer": ""}],
+        ["read_pickle", {"filepath_or_buffer": ""}],
+        ["read_sql", {"sql": "", "con": ""}],
+        ["read_fwf", {"filepath_or_buffer": ""}],
+        ["read_sql_table", {"table_name": "", "con": ""}],
+        ["read_sql_query", {"sql": "", "con": ""}],
+        ["to_pickle", {"filepath_or_buffer": "", "obj": ""}],
+        ["read_spss", {"path": ""}],
+        ["json_normalize", {"data": ""}],
+        ["read_orc", {"path": ""}],
+    ],
+)
+def test_unsupported_io(io_method, kwargs):
+    with pytest.raises(NotImplementedError):
+        getattr(pd, io_method)(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "general_method, kwargs",
+    [
+        ["merge_ordered", {"left": "", "right": ""}],
+        ["merge_asof", {"left": "", "right": ""}],
+        ["pivot", {"data": ""}],
+        ["value_counts", {"values": ""}],
+        ["crosstab", {"index": "", "columns": ""}],
+        ["lreshape", {"data": "", "groups": ""}],
+        ["wide_to_long", {"df": "", "stubnames": "", "i": "", "j": ""}],
+        ["to_timedelta", {"arg": ""}],
+    ],
+)
+def test_unsupported_general(general_method, kwargs):
+    with pytest.raises(NotImplementedError):
+        getattr(pd, general_method)(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "df_method, kwargs",
+    [
+        ["align", {"other": ""}],
+        ["asfreq", {"freq": ""}],
+        ["asof", {"where": ""}],
+        ["at", {}],
+        ["at_time", {"time": ""}],
+        ["backfill", {}],
+        ["between_time", {"start_time": "", "end_time": ""}],
+        ["bfill", {}],
+        ["bool", {}],
+        ["boxplot", {}],
+        ["clip", {}],
+        ["combine", {"other": "", "func": ""}],
+        ["combine_first", {"other": ""}],
+        ["compare", {"other": ""}],
+        ["corr", {}],
+        ["corrwith", {"other": ""}],
+        ["cov", {}],
+        ["dot", {"other": ""}],
+        ["droplevel", {"level": ""}],
+        ["eval", {"expr": "xxx"}],
+        ["ewm", {}],
+        ["expanding", {}],
+        ["explode", {"column": ""}],
+        ["filter", {}],
+        ["from_dict", {"data": ""}],
+        ["from_records", {"data": ""}],
+        ["hist", {}],
+        ["iat", {}],
+        ["infer_objects", {}],
+        ["interpolate", {}],
+        ["isetitem", {"loc": "", "value": ""}],
+        ["kurt", {}],
+        ["kurtosis", {}],
+        ["mode", {}],
+        ["nlargest", {"n": "", "columns": ""}],
+        ["nsmallest", {"n": "", "columns": ""}],
+        ["pct_change", {}],
+        ["pipe", {"func": ""}],
+        ["pivot", {}],
+        ["plot", {}],
+        ["pop", {"item": ""}],
+        ["prod", {}],
+        ["product", {}],
+        ["query", {"expr": ""}],
+        ["reindex_like", {"other": ""}],
+        ["reorder_levels", {"order": ""}],
+        ["sem", {}],
+        ["set_flags", {}],
+        ["stack", {}],
+        ["style", {}],
+        ["swapaxes", {"axis1": "", "axis2": ""}],
+        ["swaplevel", {}],
+        ["to_clipboard", {}],
+        ["to_csv", {}],
+        ["to_excel", {"excel_writer": ""}],
+        ["to_feather", {"path": ""}],
+        ["to_gbq", {"destination_table": ""}],
+        ["to_hdf", {"path_or_buf": "", "key": ""}],
+        ["to_html", {}],
+        ["to_json", {}],
+        ["to_latex", {}],
+        ["to_markdown", {}],
+        ["to_orc", {}],
+        ["to_parquet", {}],
+        ["to_period", {}],
+        ["to_pickle", {"path": ""}],
+        ["to_records", {}],
+        ["to_sql", {"name": "", "con": ""}],
+        ["to_stata", {"path": ""}],
+        ["to_string", {}],
+        ["to_timestamp", {}],
+        ["to_xarray", {}],
+        ["to_xml", {}],
+        ["transform", {"func": [[], {}]}],
+        ["truncate", {}],
+        ["tz_convert", {"tz": ""}],
+        ["tz_localize", {"tz": ""}],
+        ["unstack", {}],
+        ["xs", {"key": ""}],
+    ],
+)
+def test_unsupported_df(df_method, kwargs):
+    mock_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    mock_query_compiler.columnarize.return_value = mock_query_compiler
+    mock_df = DataFrame(query_compiler=mock_query_compiler)
+
+    with pytest.raises(NotImplementedError):
+        getattr(mock_df, df_method)(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "df_method, kwargs",
+    [["items", {}], ["iteritems", {}]],
+)
+def test_unsupported_df_generator(df_method, kwargs):
+    mock_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    mock_query_compiler.columnarize.return_value = mock_query_compiler
+    mock_df = DataFrame(query_compiler=mock_query_compiler)
+
+    with pytest.raises(NotImplementedError):
+        for x in getattr(mock_df, df_method)(**kwargs):
+            x + 1
+
+
+@pytest.mark.parametrize(
+    "series_method, kwargs",
+    [
+        ["align", {"other": ""}],
+        ["argmax", {}],
+        ["argmin", {}],
+        ["argsort", {}],
+        ["array", {}],
+        ["asfreq", {"freq": ""}],
+        ["asof", {"where": ""}],
+        ["at", {}],
+        ["at_time", {"time": ""}],
+        ["autocorr", {}],
+        ["backfill", {}],
+        ["between", {"left": "", "right": ""}],
+        ["between_time", {"start_time": "", "end_time": ""}],
+        ["bfill", {}],
+        ["bool", {}],
+        ["clip", {}],
+        ["combine", {"other": "", "func": ""}],
+        ["combine_first", {"other": ""}],
+        ["compare", {"other": ""}],
+        ["corr", {"other": ""}],
+        ["cov", {"other": ""}],
+        ["divmod", {"other": ""}],
+        ["dot", {"other": ""}],
+        ["droplevel", {"level": ""}],
+        ["ewm", {}],
+        ["expanding", {}],
+        ["explode", {}],
+        ["factorize", {}],
+        ["filter", {}],
+        ["hist", {}],
+        ["iat", {}],
+        ["infer_objects", {}],
+        ["interpolate", {}],
+        ["item", {}],
+        ["kurt", {}],
+        ["kurtosis", {}],
+        ["mode", {}],
+        ["nbytes", {}],
+        ["nlargest", {"n": ""}],
+        ["nsmallest", {"n": ""}],
+        ["nsmallest", {}],
+        ["pct_change", {}],
+        ["pipe", {"func": ""}],
+        ["plot", {}],
+        ["pop", {"item": ""}],
+        ["prod", {}],
+        ["ravel", {}],
+        ["reindex_like", {"other": ""}],
+        ["reorder_levels", {"order": ""}],
+        ["repeat", {"repeats": ""}],
+        ["rdivmod", {"other": ""}],
+        ["searchsorted", {"value": ""}],
+        ["sem", {}],
+        ["set_flags", {}],
+        ["swapaxes", {"axis1": "", "axis2": ""}],
+        ["swaplevel", {}],
+        ["to_clipboard", {}],
+        ["to_csv", {}],
+        ["to_excel", {"excel_writer": ""}],
+        ["to_hdf", {"path_or_buf": "", "key": ""}],
+        ["to_json", {}],
+        ["to_latex", {}],
+        ["to_markdown", {}],
+        ["to_period", {}],
+        ["to_pickle", {"path": ""}],
+        ["to_sql", {"name": "", "con": ""}],
+        ["to_string", {}],
+        ["to_timestamp", {}],
+        ["to_xarray", {}],
+        ["transform", {"func": ""}],
+        ["truncate", {}],
+        ["tz_convert", {"tz": ""}],
+        ["tz_localize", {"tz": ""}],
+        ["unstack", {}],
+        ["view", {}],
+        ["xs", {"key": ""}],
+    ],
+)
+def test_unsupported_series(series_method, kwargs):
+    mock_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    mock_query_compiler.columnarize.return_value = mock_query_compiler
+    mock_df = Series(query_compiler=mock_query_compiler)
+
+    with pytest.raises(NotImplementedError):
+        getattr(mock_df, series_method)(**kwargs)
+
+
+@pytest.mark.parametrize(
+    "series_method, kwargs",
+    [["items", {}]],
+)
+def test_unsupported_series_generator(series_method, kwargs):
+    mock_query_compiler = mock.create_autospec(SnowflakeQueryCompiler)
+    mock_query_compiler.columnarize.return_value = mock_query_compiler
+    mock_df = Series(query_compiler=mock_query_compiler)
+
+    with pytest.raises(NotImplementedError):
+        for x in getattr(mock_df, series_method)(**kwargs):
+            x + 1
diff --git a/tests/unit/modin/test_utils.py b/tests/unit/modin/test_utils.py
new file mode 100644
index 00000000000..56c070204f8
--- /dev/null
+++ b/tests/unit/modin/test_utils.py
@@ -0,0 +1,949 @@
+#
+# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
+#
+import re
+from unittest import mock
+from unittest.mock import Mock, patch
+
+import numpy as np
+import pandas as native_pd
+import pytest
+from modin.pandas import DataFrame, Series
+
+from snowflake.snowpark import Column
+from snowflake.snowpark._internal.analyzer.analyzer_utils import DOUBLE_QUOTE
+from snowflake.snowpark._internal.type_utils import VALID_PYTHON_TYPES_FOR_LITERAL_VALUE
+from snowflake.snowpark.functions import col
+from snowflake.snowpark.modin.pandas.indexing import is_boolean_array
+from snowflake.snowpark.modin.plugin._internal.ordered_dataframe import OrderedDataFrame
+from snowflake.snowpark.modin.plugin._internal.utils import (
+    _MAX_IDENTIFIER_LENGTH,
+    INDEX_LABEL,
+    LEVEL_LABEL,
+    append_columns,
+    assert_duplicate_free,
+    check_snowpark_pandas_object_in_arg,
+    convert_numpy_pandas_scalar_to_snowpark_literal,
+    convert_snowflake_string_constant_to_python_string,
+    extract_all_duplicates,
+    extract_non_pandas_label_from_object_construct_map,
+    extract_pandas_label_from_object_construct_map,
+    extract_pandas_label_from_object_construct_snowflake_quoted_identifier,
+    extract_pandas_label_from_snowflake_quoted_identifier,
+    fill_missing_levels_for_pandas_label,
+    fill_none_in_index_labels,
+    fillna_label_to_value_map,
+    from_pandas_label,
+    generate_new_labels,
+    generate_snowflake_quoted_identifiers_helper,
+    get_mapping_from_left_to_right_columns_by_label,
+    is_all_label_components_none,
+    is_json_serializable_pandas_labels,
+    is_repr_truncated,
+    is_valid_snowflake_quoted_identifier,
+    label_prefix_match,
+    parse_snowflake_object_construct_identifier_to_map,
+    serialize_pandas_labels,
+    to_pandas_label,
+    try_convert_to_simple_slice,
+    unquote_name_if_quoted,
+)
+
+
+def check_identifier_equality_or_match_pattern(
+    identifier: str, expected_pattern: str, is_equal: bool
+) -> None:
+    if is_equal:
+        assert identifier == expected_pattern
+    else:
+        assert re.match(expected_pattern, identifier)
+
+
+@pytest.mark.parametrize("identifier", ['"A"', '"""A"""', '"A""B"', '""""'])
+def test_is_valid_snowflake_quoted_identifier_true(identifier: str) -> None:
+    assert is_valid_snowflake_quoted_identifier(identifier)
+
+
+@pytest.mark.parametrize(
+    "identifier", ["", '"', '""', '"A', '"A"B"', '"""', '""A""', "abc"]
+)
+def test_is_valid_snowflake_quoted_identifier_false(identifier):
+    assert not is_valid_snowflake_quoted_identifier(identifier)
+
+
+@pytest.mark.parametrize(
+    "labels, expected_identifier_patterns",
+    [
+        (
+            ["A", '"A"', 12, 42, None, (2, 3)],
+            ['"A"', '"""A"""', '"12"', '"42"', '"None"', r'"\(2, 3\)"'],
+        ),
+        (["A", "A"], ['"A"', r'"A_[0-9a-z]+"']),
+        (["(2, 3)", (2, 3)], [r'"\(2, 3\)"', r'"\(2, 3\)_[0-9a-z]+"']),
+        (
+            ["(2, 3)", (2, 3), (2, 3)],
+            [r'"\(2, 3\)"', r'"\(2, 3\)_[0-9a-z]+"', r'"\(2, 3\)_[0-9a-z]+"'],
+        ),
+    ],
+)
+def test_generate_snowflake_quoted_identifiers_excluded_empty(
+    labels,
+    expected_identifier_patterns,
+):
+    identifiers = generate_snowflake_quoted_identifiers_helper(
+        pandas_labels=labels, excluded=[]
+    )
+    for identifier, pattern in zip(identifiers, expected_identifier_patterns):
+        assert re.match(pattern, identifier)
+
+
+@pytest.mark.parametrize(
+    "labels_to_expected_identifiers",
+    [
+        (
+            ["A", '"A"', 12, 42, None, (2, 3)],
+            ['"A"', '"""A"""', '"12"', '"42"', '"None"', '"(2, 3)"'],
+        ),
+        (["A", "A"], ['"A"', '"A"']),
+        (["(2, 3)", (2, 3)], ['"(2, 3)"', '"(2, 3)"']),
+        (["(2, 3)", (2, 3), (2, 3)], ['"(2, 3)"', '"(2, 3)"', '"(2, 3)"']),
+    ],
+)
+def test_generate_snowflake_quoted_identifiers_excluded_none(
+    labels_to_expected_identifiers,
+):
+    (labels, expected_identifiers) = labels_to_expected_identifiers
+    assert (
+        generate_snowflake_quoted_identifiers_helper(pandas_labels=labels)
+        == expected_identifiers
+    )
+
+
+@pytest.mark.parametrize(
+    "excluded_identifiers, has_suffix",
+    [
+        (['"A"'], True),
+        (['"A"', '"A_0"'], True),
+        (['"B"', '"C"'], False),
+        (['"A"', '"B"', '"C"'], True),
+        (['"__A__"', '"B"', '"C"'], False),
+    ],
+)
+def test_generate_snowflake_quoted_identifier_excluded_not_empty(
+    excluded_identifiers,
+    has_suffix,
+):
+    identifier = generate_snowflake_quoted_identifiers_helper(
+        pandas_labels=["A"], excluded=excluded_identifiers
+    )[0]
+    if has_suffix:
+        assert re.match(r'"A_[0-9a-z]+"', identifier)
+    else:
+        assert identifier == '"A"'
+
+
+@pytest.mark.parametrize(
+    "excluded_identifiers, has_suffix",
+    [
+        (['"A"', '"_A_"'], False),
+        (['"__A__"', '"A"'], True),
+        (['"__a__"', '"A"'], False),
+    ],
+)
+def test_generate_snowflake_quoted_identifier_wrap_underscore(
+    excluded_identifiers, has_suffix
+):
+    identifier = generate_snowflake_quoted_identifiers_helper(
+        pandas_labels=["A"], excluded=excluded_identifiers, wrap_double_underscore=True
+    )[0]
+    if has_suffix:
+        assert re.match(r'"__A_[0-9a-z]+__"', identifier)
+    else:
+        assert identifier == '"__A__"'
+
+
+def test_generate_snowflake_quoted_identifier_invalid_excluded_negative():
+    # verify unquoted identifiers result in error.
+    with pytest.raises(
+        ValueError,
+        match="'excluded' must have quoted identifiers. Found unquoted identifier='A'",
+    ):
+        generate_snowflake_quoted_identifiers_helper(
+            excluded=["A"], pandas_labels=["A"]
+        )
+
+
+def test_generated_names_have_max_length():
+    label = "A"
+    all_identifiers = ['"A"']
+    all_labels = [label]
+    for _i in range(0, 100):
+        label = generate_new_labels(pandas_labels=[label], excluded=all_labels)[0]
+        all_labels.append(label)
+        identifier = generate_snowflake_quoted_identifiers_helper(
+            pandas_labels=[label], excluded=all_identifiers
+        )[0]
+        all_identifiers.append(identifier)
+    assert [all(len(id) <= _MAX_IDENTIFIER_LENGTH for id in all_identifiers)]
+    assert [all(len(id) <= _MAX_IDENTIFIER_LENGTH for id in all_labels)]
+
+
+@patch(
+    "snowflake.snowpark.modin.plugin._internal.utils.generate_column_identifier_random"
+)
+def test_generate_snowflake_quoted_identifier_random_suffix_conflict(
+    mock_generate_column_identifier_random,
+):
+    mock_generate_column_identifier_random.return_value = "suffix"
+    with pytest.raises(
+        ValueError,
+        match="Failed to generate quoted identifier for pandas label",
+    ):
+        generate_snowflake_quoted_identifiers_helper(
+            pandas_labels=["A"], excluded=['"A"', '"A_suffix"']
+        )
+
+
+@pytest.mark.parametrize(
+    "test_data, serialized_result",
+    [
+        ([None], ["None"]),
+        ([(None,)], ["[null]"]),
+        (["abc"], ["abc"]),
+        ([("abc",)], ['["abc"]']),
+        (["abc", "def", "ghi"], ["abc", "def", "ghi"]),
+        ([("abc", "def", "ghi")], ['["abc", "def", "ghi"]']),
+        ([("abc",), ("def",), ("ghi",)], ['["abc"]', '["def"]', '["ghi"]']),
+        ([("abc", "def"), ("ghi", "jkl")], ['["abc", "def"]', '["ghi", "jkl"]']),
+        ([1, True, 3.14519], ["1", "True", "3.14519"]),
+        ([(1, True, 3.14519)], ["[1, true, 3.14519]"]),
+        (
+            [
+                (
+                    "abc",
+                    1,
+                ),
+                (True, 3.14519),
+                (None, "foo"),
+            ],
+            ['["abc", 1]', "[true, 3.14519]", '[null, "foo"]'],
+        ),
+    ],
+)
+def test_serialize_pandas_labels(test_data, serialized_result):
+    assert serialize_pandas_labels(test_data) == serialized_result
+
+
+@pytest.mark.parametrize(
+    "test_pandas_label, is_json_serializable",
+    [
+        (None, True),
+        (1.234, True),
+        ((1.234,), True),
+        (False, True),
+        ((False,), True),
+        ((None,), True),
+        ("abc", True),
+        (("abc",), True),
+        (native_pd.Timedelta(1, "d"), False),
+        (
+            native_pd.Timestamp(
+                year=2023, month=9, day=28, hour=13, minute=20, second=1
+            ),
+            False,
+        ),
+        (
+            (
+                "abc",
+                "def",
+                "ghi",
+            ),
+            True,
+        ),
+        (
+            (
+                "abc",
+                1.23,
+                "ghi",
+            ),
+            True,
+        ),
+        (
+            (
+                "abc",
+                None,
+                "ghi",
+            ),
+            True,
+        ),
+        (
+            (
+                "abc",
+                "def",
+                None,
+            ),
+            True,
+        ),
+        ((1, True, 3.14519), True),
+        (("1", "True", "3.14519"), True),
+        ((np.float32(3.14159),), False),
+        (("yes", np.ndarray([1, 2, 3])), False),
+        (("yes", native_pd.Timedelta(1, "d")), False),
+        (
+            (
+                "no",
+                native_pd.Timestamp(
+                    year=2023, month=9, day=28, hour=13, minute=20, second=1
+                ),
+            ),
+            False,
+        ),
+    ],
+)
+def test_is_json_serializable_pandas_labels(test_pandas_label, is_json_serializable):
+    assert (
+        is_json_serializable_pandas_labels([test_pandas_label]) == is_json_serializable
+    )
+
+
+def test_assert_duplicate_free():
+    test_arrays_and_result = [
+        (["1", "2", "1", "3", "2"], False),
+        (["1", "2", "3"], True),
+        ([], True),
+    ]
+
+    for arr, ref in test_arrays_and_result:
+        # for false, make sure function produces proper assert
+        if not ref:
+            # assertion error produces detailed response, match here with needle only.
+            with pytest.raises(AssertionError, match="Found duplicates of type test"):
+                assert_duplicate_free(arr, "test")
+        else:
+            assert_duplicate_free(arr, "test")
+
+
+def test_to_pandas_label() -> None:
+    # verify tuple with length 1
+    assert to_pandas_label(("A",)) == "A"
+    assert to_pandas_label((None,)) is None
+    assert to_pandas_label(("",)) == ""
+    assert to_pandas_label((("A",),)) == ("A",)
+    assert to_pandas_label((("A", "B"),)) == ("A", "B")
+
+    # verify tuple with length > 1
+    assert to_pandas_label(("A", "B", "C")) == ("A", "B", "C")
+    assert to_pandas_label(("A", "", None)) == ("A", "", None)
+    assert to_pandas_label((None, None, None)) == (None, None, None)
+    assert to_pandas_label((("a", "b"), ("c", ""), (None, None))) == (
+        ("a", "b"),
+        ("c", ""),
+        (None, None),
+    )
+
+
+def test_from_pandas_label() -> None:
+    assert from_pandas_label("A", num_levels=1) == ("A",)
+    assert from_pandas_label(("A", "B"), num_levels=1) == (("A", "B"),)
+    assert from_pandas_label(("A", "B"), num_levels=2) == (
+        "A",
+        "B",
+    )
+
+
+@pytest.mark.parametrize(
+    "sf_quoted_identifier_to_expected_pandas_label",
+    [('"abc"', "abc"), ('"ab""c"', 'ab"c'), ('"""ab""c"""', '"ab"c"')],
+)
+def test_extract_pandas_label_from_snowflake_quoted_identifier(
+    sf_quoted_identifier_to_expected_pandas_label,
+):
+    (
+        snowflake_quoted_identifier,
+        expected_pandas_label,
+    ) = sf_quoted_identifier_to_expected_pandas_label
+    assert (
+        extract_pandas_label_from_snowflake_quoted_identifier(
+            snowflake_quoted_identifier
+        )
+        == expected_pandas_label
+    )
+
+
+@pytest.mark.parametrize(
+    "sf_quoted_identifier_to_expected_pandas_label",
+    [('"UPPER"', "UPPER"), ('"lower"', "lower"), ('"MixedCase"', "MixedCase")],
+)
+def test_extract_pandas_label_from_snowflake_quoted_identifier_case_is_preserved(
+    sf_quoted_identifier_to_expected_pandas_label,
+):
+    (
+        snowflake_quoted_identifier,
+        expected_pandas_label,
+    ) = sf_quoted_identifier_to_expected_pandas_label
+    assert (
+        extract_pandas_label_from_snowflake_quoted_identifier(
+            snowflake_quoted_identifier
+        )
+        == expected_pandas_label
+    )
+
+
+@pytest.mark.parametrize("snowflake_identifier", ["abc", "'abc'", 'ab"cd', 'ab""cd'])
+def test_extract_pandas_label_from_snowflake_quoted_identifier_negative(
+    snowflake_identifier,
+):
+    # input identifier must be quoted.
+    with pytest.raises(AssertionError):
+        extract_pandas_label_from_snowflake_quoted_identifier(snowflake_identifier)
+
+
+@pytest.mark.parametrize(
+    "sf_quoted_identifier_to_expected_multi_pandas_label",
+    [
+        ('{"0":"abc","1":"def"}', ("abc", "def")),
+        ('{"0":"ab\\"c","1":"def"}', ('ab"c', "def")),
+        ('{"0":"abc\\"\\"","1":"def"}', ('abc""', "def")),
+        ('{"0":"abc","1":"\\"def"}', ("abc", '"def')),
+        ('{"0":"abc\\"\\"","1":"\\"\\"def"}', ('abc""', '""def')),
+        ('{"0":"ab\\"","1":"\\"c\\"","2":"def"}', ('ab"', '"c"', "def")),
+        ('{"0":"a,b,c","1":"d,e,f"}', ("a,b,c", "d,e,f")),
+        ('{"0":"abc","1":"def\\"","2":"ghi"}', ("abc", 'def"', "ghi")),
+        ('{"0":"abc,,","1":",,,def"}', ("abc,,", ",,,def")),
+        ('{"0":"ab,c,","1":",de,f"}', ("ab,c,", ",de,f")),
+        ('{"0":"abc,,","1":",\\",\\"def"}', ("abc,,", ',","def')),
+        ('{"0":",abc","1":"def"}', (",abc", "def")),
+        ('{"0":",\\",\\"abc","1":"def"}', (',","abc', "def")),
+        ('{"0":"\\",\\"abc","1":"def"}', ('","abc', "def")),
+        ('{"0":"abc","2":"ghi"}', ("abc", None, "ghi")),
+        ('{"1":"def"}', (None, "def", None)),
+        ("{}", (None, None, None)),
+        (
+            "{}",
+            None,
+        ),
+        (
+            '{"1":"null","2":"inside null string"}',
+            (None, "null", "inside null string"),
+        ),
+    ],
+)
+def test_extract_pandas_label_tuple_from_object_construct_snowflake_quoted_identifier(
+    sf_quoted_identifier_to_expected_multi_pandas_label,
+):
+    (
+        identifier,
+        expected_pandas_label,
+    ) = sf_quoted_identifier_to_expected_multi_pandas_label
+    snowflake_quoted_identifier = (
+        '"' + repr(identifier.replace(DOUBLE_QUOTE, DOUBLE_QUOTE + DOUBLE_QUOTE)) + '"'
+    )
+    assert (
+        extract_pandas_label_from_object_construct_snowflake_quoted_identifier(
+            snowflake_quoted_identifier,
+            len(expected_pandas_label)
+            if isinstance(expected_pandas_label, tuple)
+            else 1,
+        )
+        == expected_pandas_label
+    )
+
+
+@pytest.mark.parametrize(
+    "obj_construct_quoted_identifier, expected_object_map",
+    [
+        ('"{""0"":""a""}"', {"0": "a"}),
+        ('"{""0"":""a"",""1"":""b""}"', {"0": "a", "1": "b"}),
+        ('"{""0"":""a"",""1"":""b"",""2"":""c""}"', {"0": "a", "1": "b", "2": "c"}),
+        (
+            '"{""0"":""a"",""1"":""b"", ""foo"":""bar""}"',
+            {"0": "a", "1": "b", "foo": "bar"},
+        ),
+        ('"{""1"":""b"", ""foo"":""bar""}"', {"1": "b", "foo": "bar"}),
+        ('"{}"', {}),
+    ],
+)
+def test_parse_snowflake_object_construct_identifier_to_map(
+    obj_construct_quoted_identifier, expected_object_map
+):
+    assert (
+        parse_snowflake_object_construct_identifier_to_map(
+            obj_construct_quoted_identifier
+        )
+        == expected_object_map
+    )
+
+
+@pytest.mark.parametrize(
+    "obj_construct_map, levels, expected_pandas_label",
+    [
+        (
+            {"0": "a"},
+            1,
+            "a",
+        ),
+        ({"0": "a", "1": "b"}, 2, ("a", "b")),
+        ({"0": "a", "1": "b", "2": "c"}, 2, ("a", "b")),
+        ({"0": "a", "1": "b", "2": "c"}, 3, ("a", "b", "c")),
+        (
+            {"0": "a", "1": "b", "foo": "bar"},
+            2,
+            ("a", "b"),
+        ),
+        ({"1": "b", "foo": "bar"}, 2, (None, "b")),
+        ({"0": "a"}, 2, ("a", None)),
+        ({}, 2, (None, None)),
+    ],
+)
+def test_extract_pandas_label_tuple_from_object_construct_map(
+    obj_construct_map, levels, expected_pandas_label
+):
+    assert (
+        extract_pandas_label_from_object_construct_map(obj_construct_map, levels)
+        == expected_pandas_label
+    )
+
+
+@pytest.mark.parametrize(
+    "obj_construct_map, levels, expected_extra_kw",
+    [
+        ({"0": "a", "1": "b"}, 2, {}),
+        (
+            {"0": "a", "1": "b", "foo": "bar"},
+            2,
+            {"foo": "bar"},
+        ),
+        (
+            {"1": "b", "foo": "bar"},
+            2,
+            {"foo": "bar"},
+        ),
+        ({"0": "a", "a": "0"}, 2, {"a": "0"}),
+        ({"2": "val"}, 2, {"2": "val"}),
+        ({}, 2, {}),
+    ],
+)
+def test_extract_non_pandas_label_from_object_construct_map(
+    obj_construct_map,
+    levels,
+    expected_extra_kw,
+):
+    assert (
+        extract_non_pandas_label_from_object_construct_map(obj_construct_map, levels)
+        == expected_extra_kw
+    )
+
+
+@pytest.mark.parametrize(
+    "input_index_labels, existing_labels, expected_output_index_labels",
+    [
+        (["a"], None, ["a"]),
+        ([None], None, [INDEX_LABEL]),
+        ([None], [INDEX_LABEL], [f"{LEVEL_LABEL}_0"]),
+        (["a", "b"], None, ["a", "b"]),
+        (["a", None], None, ["a", f"{LEVEL_LABEL}_1"]),
+        ([None, "b"], None, [f"{LEVEL_LABEL}_0", "b"]),
+        ([None, None], None, [f"{LEVEL_LABEL}_0", f"{LEVEL_LABEL}_1"]),
+        (
+            [None, None],
+            [INDEX_LABEL, f"{LEVEL_LABEL}_0", f"{LEVEL_LABEL}_1"],
+            [f"{LEVEL_LABEL}_0", f"{LEVEL_LABEL}_1"],
+        ),
+        ([], None, []),
+    ],
+)
+def test_fill_none_in_index_labels(
+    input_index_labels, existing_labels, expected_output_index_labels
+):
+    assert (
+        fill_none_in_index_labels(input_index_labels, existing_labels)
+        == expected_output_index_labels
+    )
+
+
+@pytest.mark.parametrize(
+    "name, unquoted_name",
+    [
+        ('""', ""),
+        ("test_table", "test_table"),
+        ('"test_table"', "test_table"),
+        ('"test"_table"', 'test"_table'),
+        ('"test""_table"', 'test"_table'),
+    ],
+)
+def test_unquote_name_if_quoted(name, unquoted_name):
+    assert unquote_name_if_quoted(name) == unquoted_name
+
+
+@pytest.mark.parametrize(
+    "identifier, unquoted_identifier",
+    [
+        ("foo", "foo"),
+        ("'foo'", "foo"),
+        ("'f''oo'", "f'oo"),
+    ],
+)
+def test_convert_snowflake_string_constant_to_python_string(
+    identifier, unquoted_identifier
+):
+    assert (
+        convert_snowflake_string_constant_to_python_string(identifier)
+        == unquoted_identifier
+    )
+
+
+def test_check_snowpark_pandas_object_in_arg():
+    mock_dataframe = Mock(spec=DataFrame)
+    mock_series = Mock(spec=Series)
+    assert check_snowpark_pandas_object_in_arg(mock_dataframe)
+    assert check_snowpark_pandas_object_in_arg(mock_series)
+    assert check_snowpark_pandas_object_in_arg([1, mock_dataframe])
+    assert check_snowpark_pandas_object_in_arg([1, {2: mock_dataframe}])
+    assert check_snowpark_pandas_object_in_arg({1: mock_series})
+    assert check_snowpark_pandas_object_in_arg({1: "pandas", 2: [3, mock_series]})
+
+
+@pytest.mark.parametrize(
+    "label, prefix_map, expected",
+    [
+        ["a", {"a": 3}, 3],
+        [("a", "b"), {("a", "b"): 3}, 3],
+        [("a", "b"), {"a": 3}, 3],
+        [("a", "b", "c"), {("a", "b"): 3}, 3],
+        ["a", {"b": 3}, None],
+        [("a", "b"), {("a", "c"): 3}, None],
+        [("a", "b"), {("a", None): 3}, None],
+        [("a", "b"), {("a",): 3}, 3],
+        [("a", "b"), {("a",): 3, ("a", "b"): 4}, 3],  # always respect the first match
+    ],
+)
+def test_label_prefix_match(label, prefix_map, expected):
+    assert label_prefix_match(label, prefix_map) == expected
+
+
+@pytest.mark.parametrize(
+    "label, prefix_map, level, expected",
+    [
+        [("a", "b"), {"a": 1}, 0, 1],
+        [("a", "b"), {"a": 1}, 1, None],
+        [("a", "b"), {"a": 1}, 10, None],
+        [("a", "b"), {"b": 1}, 0, None],
+        [("a", "b"), {"b": 1}, 1, 1],
+        [("a", "b"), {"b": 1}, 10, None],
+        [("a", "b"), {("a", "b"): 1}, 0, None],
+        [("a", "b"), {("b",): 1}, 1, None],
+    ],
+)
+def test_label_prefix_match_on_level(label, prefix_map, level, expected):
+    assert label_prefix_match(label, prefix_map, level) == expected
+
+
+@pytest.mark.parametrize(
+    "value, columns, expected",
+    [
+        [{"a": 1, "b": 2, "d": 3}, native_pd.Index(["a", "b", "c"]), {"a": 1, "b": 2}],
+        [
+            {"a": 1},
+            native_pd.Index(
+                [
+                    ("a", "aa", "aaa"),
+                    ("b", "aa", "aaa"),
+                    ("a", "bb", "bbb"),
+                    ("b", "aa", "bbb"),
+                ]
+            ),
+            {("a", "aa", "aaa"): 1, ("a", "bb", "bbb"): 1},
+        ],
+        [
+            {("a", "aa"): 1},
+            native_pd.Index(
+                [
+                    ("a", "aa", "aaa"),
+                    ("b", "aa", "aaa"),
+                    ("a", "bb", "bbb"),
+                    ("b", "aa", "bbb"),
+                ]
+            ),
+            {("a", "aa", "aaa"): 1},
+        ],
+        [
+            {"aa": 1},
+            native_pd.Index(
+                [
+                    ("a", "aa", "aaa"),
+                    ("b", "aa", "aaa"),
+                    ("a", "bb", "bbb"),
+                    ("b", "aa", "bbb"),
+                ]
+            ),
+            {},
+        ],
+    ],
+)
+def test_fillna_label_to_value_map(value, columns, expected):
+    assert fillna_label_to_value_map(value, columns) == expected
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        np.int8(1),
+        np.int16(1),
+        np.int32(1),
+        np.int64(1),
+        np.uint(1),
+        np.longlong(1),
+        np.float32(2.5),
+        np.float64(2.5),
+        np.double(2.5),
+        np.bool_(True),
+        np.datetime64("2005-02-25"),
+        native_pd.Timestamp(2017, 1, 1, 12),
+        native_pd.Timestamp("2017-01-01T12"),
+        np.nan,
+        native_pd.NaT,
+        native_pd.NA,
+    ],
+)
+def test_convert_numpy_pandas_scalar_to_snowpark_literal(value):
+    def check(original_value, converted_value):
+        assert isinstance(converted_value, VALID_PYTHON_TYPES_FOR_LITERAL_VALUE) and (
+            converted_value is None
+            if native_pd.isna(original_value)
+            else original_value == converted_value
+        )
+
+    check(value, convert_numpy_pandas_scalar_to_snowpark_literal(value))
+
+
+@pytest.mark.parametrize(
+    "value",
+    [[1, 1], (1, 1), np.array([1, 1]), native_pd.Series([1, 1])],
+)
+def test_convert_numpy_pandas_scalar_to_snowpark_literal_negative(value):
+    with pytest.raises(AssertionError, match="is not a scalar"):
+        convert_numpy_pandas_scalar_to_snowpark_literal(value)
+
+
+@pytest.mark.parametrize(
+    "pandas_label, expected_result",
+    [
+        (None, True),
+        ("A", False),
+        (("A",), False),
+        (("A", "B"), False),
+        ((None,), True),
+        ((None, None), True),
+        ((None, "A"), False),
+    ],
+)
+def test_is_all_label_components_none(pandas_label, expected_result) -> None:
+    result = is_all_label_components_none(pandas_label)
+    assert result == expected_result
+
+
+@pytest.mark.parametrize(
+    "elements, expected_result",
+    [
+        (["a", "b", "c", "d"], []),
+        (["a", "b", "a", "b", "e"], ["a", "b"]),
+        ([("a", "e"), "b", ("a", "e"), "b", ("a", "e"), ("c", "d")], [("a", "e"), "b"]),
+        ([("a", "e"), "b", ("a", "e"), None, ("a", "e"), None], [("a", "e"), None]),
+        ([(None, "e"), "b", ("a", None), "c", ("a", None), None], [("a", None)]),
+    ],
+)
+def test_extract_all_duplicates(elements, expected_result):
+    assert extract_all_duplicates(elements) == expected_result
+
+
+@pytest.mark.parametrize("arr", [[False], np.array([False, True, False])])
+def test_is_boolean_array(arr):
+    assert is_boolean_array(arr)
+
+
+@pytest.mark.parametrize("arr", [[], ["abc"], np.array([1, 2, 3]), np.array([0, 1, 1])])
+def test_is_boolean_array_negative(arr):
+    assert not is_boolean_array(arr)
+
+
+@pytest.mark.parametrize(
+    "num_level, pandas_label, level_to_start, fill_value, expected",
+    [
+        (3, "a", 1, "c", ("c", "a", "c")),
+        (1, ("a", "b"), 0, "c", ("a", "b")),
+        (3, ("a", "b"), 1, None, (None, "a", "b")),
+        (5, ("a", "b"), 1, None, (None, "a", "b", None, None)),
+        (3, ("a", "b"), 1, ("d", "e"), (("d", "e"), "a", "b")),
+        (3, "a", 0, ("d", "e"), ("a", ("d", "e"), ("d", "e"))),
+    ],
+)
+def test_construct_pandas_label_with_filling(
+    num_level, pandas_label, level_to_start, fill_value, expected
+):
+    result = fill_missing_levels_for_pandas_label(
+        pandas_label, num_level, level_to_start, fill_value
+    )
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "args,expected",
+    [
+        ((0, 1, 0, 0), False),
+        ((0, 1, 2, 0), False),
+        ((1, 0, 0, 3), False),
+        ((100, 5, 5, 0), True),
+        ((2, 5, 5, 0), False),
+        ((10, 30, 0, 20), True),
+        ((10, 3, 0, 20), False),
+        ((10, 30, 10, 20), True),
+        ((10, 10, 10, 20), False),
+        ((1, 3, 10, 20), False),
+    ],
+)
+def test_is_repr_truncated(args, expected):
+    assert is_repr_truncated(*args) == expected
+
+
+# TODO: SNOW-916739 refactor to optimize all cases to skip joins
+@pytest.mark.parametrize(
+    "input, output",
+    [
+        [slice(0, 1, 1), slice(0, 1, 1)],
+        [slice(0, 1, None), slice(0, 1, 1)],
+        [slice(None, 1, None), slice(0, 1, 1)],
+        [slice(1, None, None), None],
+        [slice(0, -1, None), None],
+        [slice(0, 1, 2), None],
+        [slice(-1, 1, 1), None],
+        [slice(1, -1, 1), None],
+        [slice(0, None, 1), None],
+        [slice(0, None, None), None],
+        [3, None],
+    ],
+)
+def test_try_convert_to_simple_slice(input, output):
+    assert output == try_convert_to_simple_slice(input)
+
+
+def test_append_columns():
+    mock_ordered_dataframe = mock.create_autospec(OrderedDataFrame)
+    mock_ordered_dataframe.select = lambda *x: x
+    mock_ordered_dataframe.projected_column_snowflake_quoted_identifiers = [
+        '"a"',
+        '"B"',
+        '"C"',
+        '"d"',
+    ]
+
+    def check_column_equality(results: list, expected_results: list) -> None:
+        for result, expected_result in zip(results, expected_results):
+            assert type(result) == type(expected_result)
+            if isinstance(result, Column):
+                assert str(result._expression) == str(expected_result._expression)
+
+    expected_result = ['"a"', '"B"', '"C"', '"d"', col("E").as_('"e"')]
+    check_column_equality(
+        append_columns(mock_ordered_dataframe, '"e"', col("E")), expected_result
+    )
+
+    expected_result = [
+        '"a"',
+        '"B"',
+        '"C"',
+        '"d"',
+        col("E").as_('"e"'),
+        col("F").as_('"f"'),
+    ]
+    check_column_equality(
+        append_columns(mock_ordered_dataframe, ['"e"', '"f"'], [col("E"), col("F")]),
+        expected_result,
+    )
+
+    # negative tests
+    with pytest.raises(
+        AssertionError, match="is not equal to the number of column objects"
+    ):
+        append_columns(mock_ordered_dataframe, ['"e"', '"f"'], [col("E")])
+
+
+@pytest.mark.parametrize(
+    "right_pandas_labels, right_snowflake_quoted_identifiers, left_pandas_labels, left_snowflake_quoted_identifiers, expected_mapping",
+    [
+        # no duplication, 1-to-1 mapping
+        (
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"C"'],
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"C"'],
+            {'"A"': '"A"', '"B"': '"B"', '"C"': '"C"'},
+        ),
+        (
+            ["a", "b", "c"],
+            ['"A"', '"E"', '"C"'],
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"F"'],
+            {'"A"': '"A"', '"E"': '"B"', '"C"': '"F"'},
+        ),
+        # duplication in right
+        (
+            ["a", "a", "c"],
+            ['"A"', '"A_1"', '"C"'],
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"C"'],
+            {'"A"': '"A"', '"A_1"': '"A"', '"C"': '"C"'},
+        ),
+        # duplication and no mapping for right
+        (
+            ["a", "a", "c", "d"],
+            ['"A"', '"A_1"', '"C"', '"D"'],
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"C"'],
+            {'"A"': '"A"', '"A_1"': '"A"', '"C"': '"C"', '"D"': None},
+        ),
+        # duplication in left
+        (
+            ["a", "b", "c"],
+            ['"A"', '"B"', '"C"'],
+            ["a", "c", "c"],
+            ['"A"', '"C"', '"C_1"'],
+            {'"A"': '"A"', '"B"': None, '"C"': '"C_1"'},
+        ),
+        # duplication in both right and left
+        (
+            ["a", "a", "b", "c"],
+            ['"A"', '"A_1"', '"B"', '"C"'],
+            ["a", "c", "c", "d"],
+            ['"A"', '"C"', '"C_1"', '"D"'],
+            {'"A"': '"A"', '"A_1"': '"A"', '"B"': None, '"C"': '"C_1"'},
+        ),
+    ],
+)
+def test_get_mapping_from_left_to_right_columns_by_label(
+    right_pandas_labels,
+    right_snowflake_quoted_identifiers,
+    left_pandas_labels,
+    left_snowflake_quoted_identifiers,
+    expected_mapping,
+):
+    assert get_mapping_from_left_to_right_columns_by_label(
+        right_pandas_labels,
+        right_snowflake_quoted_identifiers,
+        left_pandas_labels,
+        left_snowflake_quoted_identifiers,
+    )
+
+
+@pytest.mark.parametrize(
+    "pandas_labels,excluded",
+    [(["a", "b", "a"], None), (["a", "b", "a"], ["a", "b"]), (["a", "b", "a"], ["a"])],
+)
+def test_generate_new_labels(pandas_labels, excluded):
+    # ensure there's no overlap
+    assert (
+        len(
+            set(generate_new_labels(pandas_labels=pandas_labels, excluded=excluded))
+            & set(excluded or [])
+        )
+        == 0
+    )
diff --git a/tests/unit/scala/test_utils_suite.py b/tests/unit/scala/test_utils_suite.py
index 2c3327bc802..f4ad5944867 100644
--- a/tests/unit/scala/test_utils_suite.py
+++ b/tests/unit/scala/test_utils_suite.py
@@ -278,6 +278,7 @@ def check_zip_files_and_close_stream(input_stream, expected_files):
             [
                 "resources/",
                 "resources/broken.csv",
+                "resources/iris.csv",
                 "resources/test.avro",
                 "resources/test.orc",
                 "resources/test.parquet",
diff --git a/tests/utils.py b/tests/utils.py
index a8e66b3ad81..6733e969c8f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -166,6 +166,10 @@ def drop_procedure(session: "Session", name: str):
     def drop_schema(session: "Session", name: str):
         session._run_query(f"drop schema if exists {name}")
 
+    @staticmethod
+    def drop_database(session: "Session", name: str):
+        session._run_query(f"drop database if exists {name}")
+
     @staticmethod
     def unset_query_tag(session: "Session"):
         session.query_tag = None
diff --git a/tox.ini b/tox.ini
index 49715deb3fb..e699f19e1a9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -31,9 +31,11 @@ deps =
     pip >= 19.3.1
     pytest-xdist
     pytest-timeout
+    pytest-rerunfailures
     .[pandas]
     .[development]
     .[opentelemetry]
+    {env:SNOWFLAKE_PYTEST_MODIN_DEPS}
 install_command = bash ./scripts/tox_install_cmd.sh {opts} {packages}
 setenv =
     COVERAGE_FILE = {env:COVERAGE_FILE:{toxworkdir}/.coverage.{envname}}
@@ -55,6 +57,10 @@ setenv =
     SNOWFLAKE_PYTEST_IGNORE_MODIN_CMD = --ignore=src/snowflake/snowpark/modin --ignore=tests/integ/modin --ignore=tests/unit/modin
     SNOWFLAKE_PYTEST_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_PARALLELISM:} {env:SNOWFLAKE_PYTEST_COV_CMD} --ignore=tests/resources {env:SNOWFLAKE_PYTEST_IGNORE_MODIN_CMD}
     SNOWFLAKE_PYTEST_DAILY_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_DAILY_PARALLELISM:} {env:SNOWFLAKE_PYTEST_COV_CMD} --ignore=tests/resources {env:SNOWFLAKE_PYTEST_IGNORE_MODIN_CMD}
+    # This configures the extra dependency required by modin test
+    modin: SNOWFLAKE_PYTEST_MODIN_DEPS = [modin-development]
+    SNOW_1314507_WORKAROUND_RERUN_FLAGS = --reruns 2 --only-rerun "Insufficient resource during interleaved execution."
+    MODIN_PYTEST_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_PARALLELISM:} {env:SNOWFLAKE_PYTEST_COV_CMD} --ignore=tests/resources
 
 passenv =
     AWS_ACCESS_KEY_ID
@@ -79,6 +85,10 @@ commands =
     notudfdoctest: {env:SNOWFLAKE_PYTEST_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} and not udf" {posargs:} tests
     local: {env:SNOWFLAKE_PYTEST_CMD} --local_testing_mode -m "(integ and localtest) or unit or mock_unit" {posargs:} tests
     dailynotdoctest: {env:SNOWFLAKE_PYTEST_DAILY_CMD} -m "{env:SNOWFLAKE_TEST_TYPE} or udf" {posargs:} tests
+    # Snowpark pandas commands:
+    snowparkpandasnotdoctest: {env:MODIN_PYTEST_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} {env:SNOW_1314507_WORKAROUND_RERUN_FLAGS} tests/unit/modin tests/integ/modin
+    # This one only run doctest but we still need to include the tests folder to let tests/conftest.py to mark the doctest files for us
+    snowparkpandasdoctest: {env:MODIN_PYTEST_CMD} --durations=20 -m "{env:SNOWFLAKE_TEST_TYPE}" {posargs:} src/snowflake/snowpark/modin/pandas/ tests/unit/modin
 
 [testenv:nopandas]
 allowlist_externals = bash
@@ -146,6 +156,11 @@ usedevelop = True
 commands = python -m pip list --format=columns
            python -c "print(r'{envpython}')"
 
+[testenv:snowpark_pandas_py38_import_error]
+description = test RuntimeError when importing Snowpark pandas on Python 3.8
+basepython = python3.8
+commands = python -m pytest --noconftest tests/unit/modin/test_python_version.py
+
 [pytest]
 log_cli = True
 log_cli_level = DEBUG